From 7372e570d6bfe5870a7d273935ed068ce139029b Mon Sep 17 00:00:00 2001 From: Victor Nardi Vilella Date: Mon, 23 Jun 2025 00:20:37 -0300 Subject: [PATCH] feat: log when guardrails (relevance, jailbreak) are triggered --- python-backend/main.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/python-backend/main.py b/python-backend/main.py index 1a34002..b9f94ee 100644 --- a/python-backend/main.py +++ b/python-backend/main.py @@ -145,6 +145,8 @@ async def relevance_guardrail( """Guardrail to check if input is relevant to airline topics.""" result = await Runner.run(guardrail_agent, input, context=context.context) final = result.final_output_as(RelevanceOutput) + if not final.is_relevant: + print(f"[Guardrail Triggered] Relevance guardrail activated. Reason: {final.reasoning}") return GuardrailFunctionOutput(output_info=final, tripwire_triggered=not final.is_relevant) class JailbreakOutput(BaseModel): @@ -175,6 +177,8 @@ async def jailbreak_guardrail( """Guardrail to detect jailbreak attempts.""" result = await Runner.run(jailbreak_guardrail_agent, input, context=context.context) final = result.final_output_as(JailbreakOutput) + if not final.is_safe: + print(f"[Guardrail Triggered] Jailbreak guardrail activated. Reason: {final.reasoning}") return GuardrailFunctionOutput(output_info=final, tripwire_triggered=not final.is_safe) # =========================