Port to redteaming

pamelafox · pamelafox · commit 55ba423fc751 · 2025-05-01T23:27:18.000Z
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
@@ -33,7 +33,8 @@
 				"mtxr.sqltools",
 				"mtxr.sqltools-driver-pg",
 				"ms-vscode.vscode-node-azure-pack",
-                "esbenp.prettier-vscode"
+				"esbenp.prettier-vscode",
+				"twixes.pypi-assistant"
 			],
 			// Set *default* container specific settings.json values on container create.
 			"settings": {
diff --git a/.vscode/launch.json b/.vscode/launch.json
@@ -21,6 +21,14 @@
         "module": "uvicorn",
         "args": ["fastapi_app:create_app", "--factory", "--reload"],
         "justMyCode": false
+    },
+    {
+        "name": "Python: Current File",
+        "type": "debugpy",
+        "request": "launch",
+        "program": "${file}",
+        "console": "integratedTerminal",
+        "justMyCode": false
     }
   ],
   "compounds": [
diff --git a/evals/requirements.txt b/evals/requirements.txt
@@ -1,4 +1,4 @@
-git+https://github.com/Azure-Samples/ai-rag-chat-evaluator/@2025-02-06b
-azure-ai-evaluation
+git+https://github.com/Azure-Samples/ai-rag-chat-evaluator/@2025-05-01
+azure-ai-evaluation[redteam]>=1.5.0
 rich
 dotenv-azd
diff --git a/evals/safety_evaluation.py b/evals/safety_evaluation.py
@@ -1,25 +1,23 @@
 import argparse
 import asyncio
-import json
 import logging
 import os
 import pathlib
+import sys
 from enum import Enum
 
 import requests
-from azure.ai.evaluation import AzureAIProject, ContentSafetyEvaluator
-from azure.ai.evaluation.simulator import (
-    AdversarialScenario,
-    AdversarialSimulator,
-    SupportedLanguages,
-)
+from azure.ai.evaluation import AzureAIProject
+from azure.ai.evaluation.red_team import AttackStrategy, RedTeam, RiskCategory
 from azure.identity import AzureDeveloperCliCredential
 from dotenv_azd import load_azd_env
 from rich.logging import RichHandler
-from rich.progress import track
 
 logger = logging.getLogger("ragapp")
 
+# Configure logging to capture and display warnings with tracebacks
+logging.captureWarnings(True)  # Capture warnings as log messages
+
 root_dir = pathlib.Path(__file__).parent
 
 
@@ -47,11 +45,10 @@ def get_azure_credential():
 
 
 async def callback(
-    messages: dict,
+    messages: list,
     target_url: str = "http://127.0.0.1:8000/chat",
 ):
-    messages_list = messages["messages"]
-    query = messages_list[-1]["content"]
+    query = messages[-1].content
     headers = {"Content-Type": "application/json"}
     body = {
         "messages": [{"content": query, "role": "user"}],
@@ -65,7 +62,7 @@ async def callback(
         message = {"content": response["error"], "role": "assistant"}
     else:
         message = response["message"]
-    return {"messages": messages_list + [message]}
+    return {"messages": messages + [message]}
 
 
 async def run_simulator(target_url: str, max_simulations: int):
@@ -75,50 +72,35 @@ async def run_simulator(target_url: str, max_simulations: int):
         "resource_group_name": os.environ["AZURE_RESOURCE_GROUP"],
         "project_name": os.environ["AZURE_AI_PROJECT"],
     }
-
-    # Simulate single-turn question-and-answering against the app
-    scenario = AdversarialScenario.ADVERSARIAL_QA
-    adversarial_simulator = AdversarialSimulator(azure_ai_project=azure_ai_project, credential=credential)
-
-    outputs = await adversarial_simulator(
-        scenario=scenario,
+    model_red_team = RedTeam(
+        azure_ai_project=azure_ai_project,
+        credential=credential,
+        risk_categories=[
+            RiskCategory.Violence,
+            # RiskCategory.HateUnfairness,
+            # RiskCategory.Sexual,
+            # RiskCategory.SelfHarm,
+        ],
+        num_objectives=1,
+    )
+    await model_red_team.scan(
         target=lambda messages, stream=False, session_state=None, context=None: callback(messages, target_url),
-        max_simulation_results=max_simulations,
-        language=SupportedLanguages.English,  # Match this to your app language
-        randomization_seed=1,  # For more consistent results, use a fixed seed
+        scan_name="Advanced-Callback-Scan",
+        attack_strategies=[
+            AttackStrategy.EASY,  # Group of easy complexity attacks
+            # AttackStrategy.MODERATE,  # Group of moderate complexity attacks
+            # AttackStrategy.CharacterSpace,  # Add character spaces
+            # AttackStrategy.ROT13,  # Use ROT13 encoding
+            # AttackStrategy.UnicodeConfusable,  # Use confusable Unicode characters
+            # AttackStrategy.CharSwap,  # Swap characters in prompts
+            # AttackStrategy.Morse,  # Encode prompts in Morse code
+            # AttackStrategy.Leetspeak,  # Use Leetspeak
+            # AttackStrategy.Url,  # Use URLs in prompts
+            # AttackStrategy.Binary,  # Encode prompts in binary
+        ],
+        output_path="Advanced-Callback-Scan.json",
     )
 
-    # Run safety evaluation on the outputs and save the scores
-    # Do not save the outputs, as they may contain disturbing content
-    evaluators = ["hate_unfairness", "sexual", "violence", "self_harm"]
-    summary_scores = {
-        evaluator: {"low_count": 0, "score_total": 0.0, "mean_score": 0.0, "low_rate": 0.0} for evaluator in evaluators
-    }
-    for output in track(outputs, description="Evaluating simulated responses..."):
-        query = output["messages"][0]["content"]
-        answer = output["messages"][1]["content"]
-        safety_eval = ContentSafetyEvaluator(credential=credential, azure_ai_project=azure_ai_project)
-        eval_score = safety_eval(query=query, response=answer)
-        for evaluator in evaluators:
-            severity_level = HarmSeverityLevel(eval_score[evaluator])
-            if severity_level == HarmSeverityLevel.VeryLow or severity_level == HarmSeverityLevel.Low.name:
-                summary_scores[evaluator]["low_count"] += 1
-            else:
-                logger.info(f"Failing score from:\nQ: {query}\nA: {answer}\n{evaluator} score: {eval_score}")
-            numeric_severity_score = eval_score[f"{evaluator}_score"]
-            if isinstance(numeric_severity_score, float) or isinstance(numeric_severity_score, int):
-                summary_scores[evaluator]["score_total"] += numeric_severity_score
-
-    # Compute the overall statistics
-    for evaluator in evaluators:
-        if len(outputs) > 0:
-            summary_scores[evaluator]["mean_score"] = summary_scores[evaluator]["score_total"] / len(outputs)
-            summary_scores[evaluator]["low_rate"] = summary_scores[evaluator]["low_count"] / len(outputs)
-
-    # Save summary scores
-    with open(root_dir / "safety_results.json", "w") as f:
-        json.dump(summary_scores, f, indent=2)
-
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(description="Run safety evaluation simulator.")
@@ -130,10 +112,26 @@ async def run_simulator(target_url: str, max_simulations: int):
     )
     args = parser.parse_args()
 
+    # Configure logging to show tracebacks for warnings and above
     logging.basicConfig(
-        level=logging.WARNING, format="%(message)s", datefmt="[%X]", handlers=[RichHandler(rich_tracebacks=True)]
+        level=logging.WARNING,
+        format="%(message)s",
+        datefmt="[%X]",
+        handlers=[RichHandler(rich_tracebacks=True, show_path=True)],
     )
+
+    # Set urllib3 and azure libraries to WARNING level to see connection issues
+    logging.getLogger("urllib3").setLevel(logging.WARNING)
+    logging.getLogger("azure").setLevel(logging.DEBUG)
+    logging.getLogger("RedTeamLogger").setLevel(logging.DEBUG)
+
+    # Set our application logger to INFO level
     logger.setLevel(logging.INFO)
+
     load_azd_env()
 
-    asyncio.run(run_simulator(args.target_url, args.max_simulations))
+    try:
+        asyncio.run(run_simulator(args.target_url, args.max_simulations))
+    except Exception:
+        logging.exception("Unhandled exception in safety evaluation")
+        sys.exit(1)
diff --git a/infra/main.bicep b/infra/main.bicep
@@ -408,6 +408,24 @@ module openAI 'core/ai/cognitiveservices.bicep' = if (deployAzureOpenAI) {
   }
 }
 
+module storage 'br/public:avm/res/storage/storage-account:0.9.1' = if (useAiProject) {
+  name: 'storage'
+  scope: resourceGroup
+  params: {
+    name: '${take(replace(prefix, '-', ''), 17)}storage'
+    location: location
+    tags: tags
+    kind: 'StorageV2'
+    skuName: 'Standard_LRS'
+    networkAcls: {
+      defaultAction: 'Allow'
+      bypass: 'AzureServices'
+    }
+    allowBlobPublicAccess: false
+    allowSharedKeyAccess: false
+  }
+}
+
 module ai 'core/ai/ai-environment.bicep' = if (useAiProject) {
   name: 'ai'
   scope: resourceGroup
@@ -417,6 +435,7 @@ module ai 'core/ai/ai-environment.bicep' = if (useAiProject) {
     hubName: 'aihub-${resourceToken}'
     projectName: 'aiproj-${resourceToken}'
     applicationInsightsId: monitoring.outputs.applicationInsightsId
+    storageAccountId: storage.outputs.resourceId
   }
 }
 
@@ -442,6 +461,17 @@ module openAIRoleBackend 'core/security/role.bicep' = {
   }
 }
 
+// Application Insights Reader role for web app
+module appInsightsReaderRole 'core/security/role.bicep' = {
+  scope: resourceGroup
+  name: 'appinsights-reader-role'
+  params: {
+    principalId: principalId
+    roleDefinitionId: '43d0d8ad-25c7-4714-9337-8ba259a9fe05' // Application Insights Component Reader
+    principalType: 'User'
+  }
+}
+
 output AZURE_LOCATION string = location
 output AZURE_TENANT_ID string = tenant().tenantId
 output AZURE_RESOURCE_GROUP string = resourceGroup.name