diff --git a/CHANGELOG.md b/CHANGELOG.md index 7d7b245..2ff861c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Changed + +- **`aicertify demo` rewritten for the canonical rich-UX flow.** The previous demo runner produced plain `print()` output; it now mirrors [`examples/quickstart.py`](examples/quickstart.py) exactly — uses the high-level `application.create()` + `app.evaluate()` API and wraps each step in `print_banner`, `spinner`, `MessageGroup`, and `success` markers from `aicertify.utils.logging_config`. Visually identical to the canonical SDK experience. +- **CLI default verbosity now WARNING, not INFO.** `aicertify demo` and `aicertify evaluate` no longer flood the terminal with INFO-level chatter from `langfair`, `deepeval`, the OPA policy loader, etc. Pass `--verbose` to opt back in (raises root logger to INFO and `aicertify` namespace to DEBUG). +- **OPA `policy_loader` no longer warns on `helper_functions/`** — those `.rego` files are shared library code (reporting helpers, validation helpers), not policies, and were always meant to be skipped silently. Same for dot-prefixed config directories. + +### Added + +- **`docs/demo.cast` + `docs/demo.gif`** — asciinema recording of `aicertify demo` running end-to-end, embedded near the top of the README so visitors see the rich UX before installing anything. + ## [0.7.1] — 2026-05-14 ### Added diff --git a/README.md b/README.md index b08a1e9..ed9dce4 100644 --- a/README.md +++ b/README.md @@ -70,6 +70,10 @@ aicertify demo `aicertify demo` loads a bundled sample contract, evaluates it against the EU AI Act policy set via OPA, and writes `aicertify_demo_report.md` to the current directory. Open the report — that's what your audit deliverable looks like. +

+ aicertify demo recording — banner, spinners, evaluation progress, generated report path +

+ For richer evaluations (LangFair fairness metrics, DeepEval content-safety scoring, PDF reports), see [`examples/quickstart.py`](examples/quickstart.py) and the [forkable example bots](examples/) — each ships an `input_contract.json`, a `policy_config.yaml`, and a `run.py`. ### For development diff --git a/aicertify/_demo/runner.py b/aicertify/_demo/runner.py index d3d5333..f2c4644 100644 --- a/aicertify/_demo/runner.py +++ b/aicertify/_demo/runner.py @@ -4,17 +4,25 @@ vendored policy folder, and writes a Markdown report to the user's CWD. Designed to work after ``pip install aicertify`` with no extra configuration -beyond the OPA binary on PATH. Heavy ML-based evaluators are skipped by -default; the OPA verdict is the substance. +beyond the OPA binary on PATH. Mirrors ``examples/quickstart.py`` exactly: +banner + spinners + MessageGroup + success markers via +``aicertify.utils.logging_config``. + +The evaluation runs through the canonical ``application.create() + +app.evaluate()`` API. Heavy ML evaluators (DeepEval, LangFair) skip +gracefully if OPENAI_API_KEY is unset; the OPA verdict is the substance. """ from __future__ import annotations +import contextlib import json import logging +import os import platform import shutil import sys +import tempfile from importlib.resources import files from pathlib import Path from typing import Optional @@ -26,7 +34,8 @@ DEFAULT_REPORT_NAME = "aicertify_demo_report.md" # Map friendly framework names to the bundled directory under aicertify/opa_policies/ -# that we use to verify the framework is present in the wheel. +# that we use as an existence probe (so the demo fails fast with a clear +# message if the wheel was stripped or the framework name is unknown). _BUNDLED_POLICY_PROBE_PATH = { "eu_ai_act": ("international", "eu_ai_act", "v1"), "nist": ("international", "nist", "v1"), @@ -89,13 +98,10 @@ def bundled_policy_path(policy: str) -> Path: Used only as an existence probe so the demo can fail fast with a friendly message if the wheel was stripped or the framework name is unknown. The actual evaluation passes the friendly framework name (e.g. ``eu_ai_act``) - to the lib's ``find_matching_policy_folders``, which then resolves it to - the absolute directory and recurses for ``.rego`` files. + to the high-level ``application.evaluate()`` API, which resolves it. """ probe = _BUNDLED_POLICY_PROBE_PATH.get(policy) if probe is None: - # Unknown friendly name; fall back to treating the input as a - # path relative to opa_policies/. probe = ("opa_policies", *policy.split("/")) else: probe = ("opa_policies", *probe) @@ -111,87 +117,176 @@ async def run_demo( policy: str = DEFAULT_POLICY, ) -> int: """Run the bundled demo. Returns a shell-style exit code.""" + # The OPA-binary check uses only stdlib (shutil.which) so it's safe to + # run BEFORE the stderr redirect — failure messages stay visible. if opa_binary_path() is None: print_opa_install_instructions() return 1 - contract_file = bundled_contract_path() - if not contract_file.exists(): - print( - f"✗ Bundled sample contract missing at {contract_file}. " - f"This is a packaging bug — please file an issue.", - file=sys.stderr, - ) - return 1 - - policy_dir = bundled_policy_path(policy) - if not policy_dir.exists(): - print( - f"✗ Bundled policy directory {policy} not found at {policy_dir}. " - f"Try one of: international/eu_ai_act/v1, global/v1, " - f"international/nist/v1", - file=sys.stderr, - ) - return 1 - - # Load sample contract as an AiCertifyContract - from aicertify.api import load_contract - - contract_data = json.loads(contract_file.read_text()) - # load_contract accepts a path; serialise the bundled JSON to a tmp file - # via the API's existing path-based loader so we don't reimplement. - import tempfile - - with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as tmp: - json.dump(contract_data, tmp) - tmp_path = tmp.name - - try: - contract = load_contract(tmp_path) - finally: - Path(tmp_path).unlink(missing_ok=True) - - output_path = Path(output).resolve() - output_dir = output_path.parent - - print( - f"→ Running AICertify demo:\n" - f" contract: {contract.application_name} " - f"({len(contract.interactions)} interactions)\n" - f" policy: {policy}\n" - f" report: {report_format}\n" + # Don't expose CUDA — matches examples/quickstart.py to keep behaviour + # reproducible across machines with and without GPUs. + os.environ.setdefault("CUDA_VISIBLE_DEVICES", "") + + # The downstream evaluators emit a lot of WARNING / ERROR log chatter — + # much of it expected in the no-API-key demo path. Capture stderr to a + # tempfile and only surface it on failure. Started BEFORE any + # ``importlib.resources.files("aicertify…")`` or ``from aicertify`` call + # — those trigger the aicertify package init which eagerly imports the + # OPA policy_loader and emits "Skipping policy file…" warnings. + logging.getLogger().setLevel(logging.WARNING) + saved_stderr_fd = os.dup(2) + captured_stderr = tempfile.NamedTemporaryFile( + mode="w+b", prefix="aicertify-demo-stderr-", delete=False ) + os.dup2(captured_stderr.fileno(), 2) - from aicertify.api import aicertify_app_for_policy + exit_code = 0 + try: + # Remaining bundled-resource probes now run quietly (aicertify package + # init lives in the captured-stderr window). + contract_file = bundled_contract_path() + if not contract_file.exists(): + print( + f"✗ Bundled sample contract missing at {contract_file}. " + f"This is a packaging bug — please file an issue.", + file=sys.__stderr__, + ) + exit_code = 1 + return exit_code + + policy_dir = bundled_policy_path(policy) + if not policy_dir.exists(): + print( + f"✗ Bundled policy directory '{policy}' not found at " + f"{policy_dir}. Try one of: eu_ai_act, nist, global", + file=sys.__stderr__, + ) + exit_code = 1 + return exit_code + + # Deferred imports happen inside the capture so eager-import-time + # warnings go to the tempfile, not the user's terminal. + from aicertify import application, regulations + from aicertify.utils.logging_config import ( + AIC_LOGO, + MessageGroup, + error, + info, + spinner, + success, + print_banner, + ) - # Pass the relative policy name (not the absolute path); the library's - # find_matching_policy_folders() rejects absolute patterns. - results = await aicertify_app_for_policy( - contract=contract, - policy_folder=policy, - output_dir=str(output_dir), - report_format=report_format, - generate_report=True, - ) + print_banner() + info( + "Self-contained demo: bundled sample contract → " + f"{policy} policy set → {report_format} report.", + category="EVALUATION", + ) - # The API writes a timestamped report; surface the path it produced. - report_path = results.get("report_path") - if report_path: - print(f"\n✓ Report written to: {report_path}") - print( - f"\nOpen the report to see what an AICertify audit deliverable " - f"looks like.\n" + # Step 1: regulations set + with spinner("Creating regulations set", emoji="🔍"): + regs_set = regulations.create("aicertify-demo") + + try: + with spinner(f"Adding {policy} regulations", emoji="⚖️"): + regs_set.add(policy) + success(f"Loaded {policy} policy set") + except ValueError as exc: + error(f"Could not add regulation '{policy}': {exc}") + exit_code = 2 + return exit_code + + # Step 2: application + interactions from the bundled fixture + contract_data = json.loads(contract_file.read_text()) + model_info = contract_data.get("model_info", {}) + + info( + f"Building application from bundled fixture: " f"{contract_file.name}", + category="APPLICATION", + ) + with spinner( + f"Creating application: {contract_data['application_name']}", + emoji="🤖", + ): + app = application.create( + name=contract_data["application_name"], + model_name=model_info.get("model_name", "demo-model"), + model_version=model_info.get("model_version", "v1"), + model_metadata=model_info.get("metadata", {}), + ) + success(f"Created application: {contract_data['application_name']}") + + interactions = contract_data.get("interactions", []) + with spinner(f"Loading {len(interactions)} bundled interactions", emoji="💬"): + for ix in interactions: + app.add_interaction( + input_text=ix["input_text"], + output_text=ix["output_text"], + ) + success(f"Added {len(interactions)} interactions to the application") + + # Step 3: evaluate + output_path = Path(output).resolve() + output_dir = output_path.parent + output_dir.mkdir(parents=True, exist_ok=True) + + info( + f"\n{AIC_LOGO} Starting evaluation against {policy}", + category="EVALUATION", + ) + with MessageGroup("Evaluation progress") as eval_group: + with spinner( + f"Evaluating {contract_data['application_name']} against {policy}", + emoji="🧪", + ): + eval_group.add("Initializing evaluators") + eval_group.add("Loading policy files") + eval_group.add("Running OPA policy evaluation") + await app.evaluate( + regulations=regs_set, + report_format=report_format, + output_dir=str(output_dir), + ) + eval_group.add(f"Writing {report_format} report") + success("Evaluation complete") + + # Step 4: surface the produced report path + reports = app.get_report() + if not reports: + error("Evaluation finished but no report path was returned.") + exit_code = 3 + return exit_code + + for reg_name, report_path in reports.items(): + success(f"Report for {reg_name}: {report_path}") + + success("\n🎉 Demo complete 🎉") + info( + "Open the report above to see what an AICertify audit deliverable " + "looks like — generated, not handwritten." ) return 0 - - err = results.get("error") - if err: - print(f"\n✗ Demo failed: {err}", file=sys.stderr) - return 2 - - print( - "\n⚠ Demo completed but no report path was returned. " - "Check logs above for details.", - file=sys.stderr, - ) - return 3 + except Exception: + exit_code = 99 + raise + finally: + # Restore real stderr + sys.stderr.flush() + os.dup2(saved_stderr_fd, 2) + os.close(saved_stderr_fd) + captured_stderr.flush() + captured_stderr.close() + try: + if exit_code != 0: + # Demo failed — replay the captured chatter for debugging + with open(captured_stderr.name, "rb") as f: + data = f.read() + if data: + sys.stderr.write( + "\n--- captured downstream output (demo failed) ---\n" + ) + sys.stderr.write(data.decode("utf-8", errors="replace")) + finally: + with contextlib.suppress(FileNotFoundError): + Path(captured_stderr.name).unlink() diff --git a/aicertify/cli.py b/aicertify/cli.py index 53fbdda..fdf3393 100644 --- a/aicertify/cli.py +++ b/aicertify/cli.py @@ -211,8 +211,13 @@ def _inject_evaluate_for_legacy_invocation(argv: list) -> list: def main() -> int: + # Quiet by default. CLI tools should not flood the terminal with INFO-level + # chatter from downstream libraries (langfair, deepeval, transformers, the + # OPA policy loader, …) unless the user opts in via --verbose. Note: this + # runs BEFORE argparse so it's in effect when the (deferred) aicertify + # package imports happen inside the subcommand handlers. logging.basicConfig( - level=logging.INFO, + level=logging.WARNING, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", ) @@ -222,6 +227,7 @@ def main() -> int: args = parser.parse_args() if args.verbose: + logging.getLogger().setLevel(logging.INFO) logging.getLogger("aicertify").setLevel(logging.DEBUG) if not hasattr(args, "func"): diff --git a/aicertify/opa_core/policy_loader.py b/aicertify/opa_core/policy_loader.py index 8002890..3e14a99 100644 --- a/aicertify/opa_core/policy_loader.py +++ b/aicertify/opa_core/policy_loader.py @@ -222,9 +222,19 @@ def _load_policies(self) -> Dict[str, Dict[str, Dict[str, List[str]]]]: # Skip legacy paths or unexpected structures if parts[0] not in policies: - logging.warning( - f"Skipping policy file in unrecognized category: {policy_file}" - ) + # ``helper_functions/`` holds shared Rego helpers + # (reporting.rego, validation.rego, …) — not policies, + # so don't warn on them. Same for the version-tracking + # ``.github/``, ``.regal/`` etc. dotfile dirs. + if parts[0] in ("helper_functions",) or parts[0].startswith("."): + logging.debug( + f"Skipping shared-helper / config file (not a policy): " + f"{policy_file}" + ) + else: + logging.warning( + f"Skipping policy file in unrecognized category: {policy_file}" + ) continue category = parts[0] # global, international, etc. diff --git a/docs/demo.cast b/docs/demo.cast new file mode 100644 index 0000000..c487569 --- /dev/null +++ b/docs/demo.cast @@ -0,0 +1,135 @@ +{"version": 2, "width": 100, "height": 28, "timestamp": 1778771646, "idle_time_limit": 1.0, "env": {"SHELL": "/usr/bin/zsh", "TERM": "xterm-256color"}} +[10.973202, "o", "INFO: PyTorch version 2.12.0 available.\r\n"] +[15.599447, "o", "INFO: Discovered 20 evaluator classes\r\n"] +[15.599839, "o", "INFO: Registered AccuracyEvaluator for metrics: ['accuracy.score', 'accuracy.precision', 'accuracy.recall']\r\nINFO: Registered BiometricCategorizationEvaluator for metrics: ['biometric.categorization.score', 'biometric.gender.score', 'biometric.ethnicity.score', 'biometric.age.score', 'biometric.disability.score', 'biometric.compliance', 'metrics.biometric.categorization.score']\r\n"] +[15.600143, "o", "INFO: Registered ContentSafetyEvaluator for metrics: ['content_safety.score', 'content_safety.toxic_fraction', 'content_safety.max_toxicity', 'content_safety.toxicity_probability', 'content_safety.categories', 'metrics.content_safety.score', 'metrics.content_safety.toxic_fraction']\r\nINFO: Registered EmotionRecognitionEvaluator for metrics: ['emotion_recognition.score', 'emotion_recognition.workplace_context', 'emotion_recognition.educational_context', 'emotion_recognition.combined_score', 'metrics.emotion_recognition.score', 'metrics.emotion_recognition.workplace_context', 'metrics.emotion_recognition.educational_context']\r\n"] +[15.600442, "o", "INFO: Registered FairnessEvaluator for metrics: ['fairness.score', 'fairness.gender_bias', 'fairness.racial_bias', 'fairness.counterfactual_score', 'fairness.stereotype_score', 'fairness.combined_score', 'fairness.sentiment_bias', 'fairness.race_words_count', 'fairness.gender_words_count', 'fairness.ftu_satisfied', 'metrics.fairness.gender_bias', 'metrics.fairness.score']\r\n"] +[15.600507, "o", "INFO: Registered ManipulationEvaluator for metrics: ['manipulation.score', 'manipulation.deception_score', 'manipulation.toxicity_score', 'manipulation.compliance', 'metrics.manipulation.score']\r\n"] +[15.600837, "o", "INFO: Registered ModelCardEvaluator for metrics: ['model_card.score', 'model_card.completeness', 'model_card.quality', 'model_card.section_scores', 'model_card.compliance_level', 'metrics.model_card.score', 'metrics.model_card.completeness', 'metrics.model_card.quality']\r\nINFO: Registered RiskManagementEvaluator for metrics: ['risk_management.score', 'risk_management.assessment_score', 'risk_management.mitigation_score', 'risk_management.monitoring_score', 'risk_management.compliance', 'metrics.risk_management.score']\r\n"] +[15.601049, "o", "INFO: Registered SocialScoringEvaluator for metrics: ['social_scoring.score', 'social_scoring.detrimental_treatment', 'social_scoring.combined_score', 'metrics.social_scoring.score', 'metrics.social_scoring.detrimental_treatment']\r\n"] +[15.601296, "o", "INFO: Registered VulnerabilityExploitationEvaluator for metrics: ['vulnerability.score', 'vulnerability.age_score', 'vulnerability.disability_score', 'vulnerability.socioeconomic_score', 'vulnerability.compliance', 'metrics.vulnerability.score']\r\nINFO: Initialized evaluator registry with 10 evaluators for 66 metrics\r\n"] +[15.60166, "o", "INFO: Registered metrics: accuracy.precision, accuracy.recall, accuracy.score, biometric.age.score, biometric.categorization.score, biometric.compliance, biometric.disability.score, biometric.ethnicity.score, biometric.gender.score, content_safety.categories, content_safety.max_toxicity, content_safety.score, content_safety.toxic_fraction, content_safety.toxicity_probability, emotion_recognition.combined_score, emotion_recognition.educational_context, emotion_recognition.score, emotion_recognition.workplace_context, fairness.combined_score, fairness.counterfactual_score, fairness.ftu_satisfied, fairness.gender_bias, fairness.gender_words_count, fairness.race_words_count, fairness.racial_bias, fairness.score, fairness.sentiment_bias, fairness.stereotype_score, manipulation.compliance, manipulation.deception_score, manipulation.score, manipulation.toxicity_score, metrics.biometric.categorization.score, metrics.content_safety.score, metrics.content_safety.toxic_fraction, metrics.emotion_recognition.educational_context, metrics.emotion_recognition.score, metrics.emotion_recognition.workplace_context, metrics.fairness.gender_bias, metrics.fairness.score, metrics.manipulation.score, metrics.model_card.completeness, metrics.model_card.quality, metrics.model_card.score, metrics.risk_management.score, metrics.social_scoring.detrimental_treatment, metrics.social_scoring.score, metrics.vulnerability.score, model_card.completeness, model_card.compliance_level, model_card.quality, model_card.score, model_card.section_scores, risk_management.assessment_score, risk_management.compliance, risk_management.mitigation_score, risk_management.monitoring_score, risk_management.score, social_scoring.combined_score, social_scoring.detrimental_treatment, social_scoring.score, vulnerability.age_score, vulnerability.compliance, vulnerability.disability_score, vulnerability.score, vulnerability.socioeconomic_score\r\n"] +[15.603358, "o", "INFO: Found gopal version: 1.0.0\r\n"] +[15.620241, "o", "INFO: Found OPA at fixed Linux path: /usr/local/bin/opa\r\n"] +[15.620953, "o", "INFO: Found gopal version: 1.0.0\r\n"] +[15.654554, "o", "\r\n \u001b[1;34m _ _ ___ _ _ __\u001b[0m\r\n\u001b[1;34m \u001b[0m\u001b[1;34m/\u001b[0m\u001b[1;34m \\ \u001b[0m\u001b[1;34m(\u001b[0m\u001b[1;34m_\u001b[0m\u001b[1;34m)\u001b[0m\u001b[1;34m/\u001b[0m\u001b[1;34m __\\ ___ _ __| |\u001b[0m\u001b[1;34m_\u001b[0m\u001b[1;34m(\u001b[0m\u001b[1;34m_\u001b[0m\u001b[1;34m)\u001b[0m\u001b[1;34m/\u001b[0m\u001b[1;34m _|_ _\u001b[0m\r\n\u001b[1;34m \u001b[0m\u001b[1;34m/\u001b[0m\u001b[1;34m _ \\ | \u001b[0m\u001b[1;34m/\u001b[0m\u001b[1;34m \u001b[0m\u001b[1;34m/\u001b[0m\u001b[1;34m \u001b[0m\u001b[1;34m/\u001b[0m\u001b[1;34m _ \\ '__| __| | |_| | | |\u001b[0m\r\n\u001b[1;34m \u001b[0m\u001b[1;34m/\u001b[0m\u001b[1;34m ___ \\| \u001b[0m\u001b[1;34m/\u001b[0m\u001b[1;34m \u001b[0m\u001b[1;34m/\u001b[0m\u001b[1;34m | __/ | | |_| | _| |_| |\u001b[0m\r\n\u001b[1;34m \u001b[0m\u001b[1;34m/_/\u001b[0m\u001b[1;34m \\_\\_\\\u001b[0m\u001b[1;34m/\u001b[0m\u001b[1;34m \\___|_| \\__|_|_| \\__, |\u001b[0m\r\n\u001b[1;34m |___/ \u001b[0m\r\n \r\n"] +[15.655567, "o", "\u001b[1;34m🔰 AI Certification Framework\u001b[0m\r\n"] +[15.65688, "o", "\u001b[34mValidate and certify AI applications against regulatory requirements\u001b[0m\r\n\r\n"] +[15.658902, "o", "\u001b[34m🔰 🧪 Self-contained demo: bundled sample contract → eu_ai_act policy set → markdown report.\u001b[0m\r\n"] +[15.65997, "o", "\u001b[?25l"] +[15.663462, "o", "\r\u001b[2K\u001b[32m⠋\u001b[0m \u001b[1;32m🔍 Creating regulations set\u001b[0m"] +[15.763302, "o", "\r\u001b[2K\u001b[32m⠙\u001b[0m \u001b[1;32m🔍 Creating regulations set\u001b[0m"] +[15.765145, "o", "\r\u001b[2K\u001b[32m⠙\u001b[0m \u001b[1;32m🔍 Creating regulations set\u001b[0m\r\n\u001b[?25h"] +[15.766234, "o", "\u001b[?25l"] +[15.768313, "o", "\r\u001b[2K\u001b[32m⠋\u001b[0m \u001b[1;32m⚖️ Adding eu_ai_act regulations\u001b[0m"] +[15.769991, "o", "\r\u001b[2K\u001b[32m⠋\u001b[0m \u001b[1;32m⚖️ Adding eu_ai_act regulations\u001b[0m\r\n\u001b[?25h"] +[15.771182, "o", "\u001b[1;34m✓ Loaded eu_ai_act policy set\u001b[0m\r\n"] +[15.772041, "o", "\u001b[34m🔰 🤖 Building application from bundled fixture: sample_contract.json\u001b[0m\r\n"] +[15.772648, "o", "\u001b[?25l"] +[15.775295, "o", "\r\u001b[2K\u001b[32m⠋\u001b[0m \u001b[1;32m🤖 Creating application: AICertify Demo Assistant\u001b[0m"] +[15.777031, "o", "\r\u001b[2K\u001b[32m⠋\u001b[0m \u001b[1;32m🤖 Creating application: AICertify Demo Assistant\u001b[0m\r\n\u001b[?25h"] +[15.77838, "o", "\u001b[1;34m✓ Created application: AICertify Demo Assistant\u001b[0m\r\n"] +[15.779466, "o", "\u001b[?25l"] +[15.781208, "o", "\r\u001b[2K\u001b[32m⠋\u001b[0m \u001b[1;32m💬 Loading 6 bundled interactions\u001b[0m"] +[15.782784, "o", "\r\u001b[2K\u001b[32m⠋\u001b[0m \u001b[1;32m💬 Loading 6 bundled interactions\u001b[0m\r\n\u001b[?25h"] +[15.783548, "o", "\u001b[1;34m✓ Added \u001b[0m\u001b[1;34m6\u001b[0m\u001b[1;34m interactions to the application\u001b[0m\r\n"] +[15.78434, "o", "\u001b[34m🔰 🧪 \u001b[0m\r\n\u001b[34m🔰 Starting evaluation against eu_ai_act\u001b[0m\r\n"] +[15.784684, "o", "\u001b[?25l"] +[15.78588, "o", "\u001b[?25l"] +[15.789068, "o", "\r\u001b[2K\u001b[32m⠋\u001b[0m \u001b[1;32m🧪 Evaluating AICertify Demo Assistant against eu_ai_act\u001b[0m"] +[15.896856, "o", "\r\u001b[2K\u001b[32m⠙\u001b[0m \u001b[1;32m🧪 Evaluating AICertify Demo Assistant against eu_ai_act\u001b[0m"] +[16.000039, "o", "\r\u001b[2K\u001b[32m⠹\u001b[0m \u001b[1;32m🧪 Evaluating AICertify Demo Assistant against eu_ai_act\u001b[0m"] +[16.037754, "o", "\u001b[34m╭──────────────────────────────────────────────────────────────────────────────────────────────────╮\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[1;34m🔰 Evaluation progress: 3 items\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m • Initializing evaluators: 1\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m • Loading policy files: 1\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m • Running OPA policy evaluation: 1\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m"] +[16.101592, "o", "\r\u001b[2K\u001b[32m⠸\u001b[0m \u001b[1;32m🧪 Evaluating AICertify Demo Assistant against eu_ai_act\u001b[0m"] +[16.203529, "o", "\r\u001b[2K\u001b[32m⠴\u001b[0m \u001b[1;32m🧪 Evaluating AICertify Demo Assistant against eu_ai_act\u001b[0m"] +[16.289832, "o", "\r\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[34m╭──────────────────────────────────────────────────────────────────────────────────────────────────╮\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[1;34m🔰 Evaluation progress: 3 items\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m • Initializing evaluators: 1\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m • Loading policy files: 1\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m • Running OPA policy evaluation: 1\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m"] +[16.30518, "o", "\r\u001b[2K\u001b[32m⠦\u001b[0m \u001b[1;32m🧪 Evaluating AICertify Demo Assistant against eu_ai_act\u001b[0m"] +[16.407183, "o", "\r\u001b[2K\u001b[32m⠧\u001b[0m \u001b[1;32m🧪 Evaluating AICertify Demo Assistant against eu_ai_act\u001b[0m"] +[16.50875, "o", "\r\u001b[2K\u001b[32m⠇\u001b[0m \u001b[1;32m🧪 Evaluating AICertify Demo Assistant against eu_ai_act\u001b[0m"] +[16.541269, "o", "\r\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[34m╭──────────────────────────────────────────────────────────────────────────────────────────────────╮\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[1;34m🔰 Evaluation progress: 3 items\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m • Initializing evaluators: 1\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m • Loading policy files: 1\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m • Running OPA policy evaluation: 1\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m"] +[16.609946, "o", "\r\u001b[2K\u001b[32m⠋\u001b[0m \u001b[1;32m🧪 Evaluating AICertify Demo Assistant against eu_ai_act\u001b[0m"] +[16.711647, "o", "\r\u001b[2K\u001b[32m⠙\u001b[0m \u001b[1;32m🧪 Evaluating AICertify Demo Assistant against eu_ai_act\u001b[0m"] +[16.792788, "o", "\r\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[34m╭──────────────────────────────────────────────────────────────────────────────────────────────────╮\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[1;34m🔰 Evaluation progress: 3 items\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m • Initializing evaluators: 1\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m • Loading policy files: 1\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m • Running OPA policy evaluation: 1\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m"] +[16.814007, "o", "\r\u001b[2K\u001b[32m⠹\u001b[0m \u001b[1;32m🧪 Evaluating AICertify Demo Assistant against eu_ai_act\u001b[0m"] +[16.91627, "o", "\r\u001b[2K\u001b[32m⠼\u001b[0m \u001b[1;32m🧪 Evaluating AICertify Demo Assistant against eu_ai_act\u001b[0m"] +[17.017635, "o", "\r\u001b[2K\u001b[32m⠴\u001b[0m \u001b[1;32m🧪 Evaluating AICertify Demo Assistant against eu_ai_act\u001b[0m"] +[17.044022, "o", "\r\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[34m╭──────────────────────────────────────────────────────────────────────────────────────────────────╮\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[1;34m🔰 Evaluation progress: 3 items\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m • Initializing evaluators: 1\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m • Loading policy files: 1\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m • Running OPA policy evaluation: 1\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m"] +[17.118979, "o", "\r\u001b[2K\u001b[32m⠦\u001b[0m \u001b[1;32m🧪 Evaluating AICertify Demo Assistant against eu_ai_act\u001b[0m"] +[17.220533, "o", "\r\u001b[2K\u001b[32m⠧\u001b[0m \u001b[1;32m🧪 Evaluating AICertify Demo Assistant against eu_ai_act\u001b[0m"] +[17.295227, "o", "\r\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[34m╭──────────────────────────────────────────────────────────────────────────────────────────────────╮\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[1;34m🔰 Evaluation progress: 3 items\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m • Initializing evaluators: 1\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m • Loading policy files: 1\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m • Running OPA policy evaluation: 1\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m"] +[17.37225, "o", "\r\u001b[2K\u001b[32m⠏\u001b[0m \u001b[1;32m🧪 Evaluating AICertify Demo Assistant against eu_ai_act\u001b[0m"] +[19.212032, "o", "\r\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[34m╭──────────────────────────────────────────────────────────────────────────────────────────────────╮\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[1;34m🔰 Evaluation progress: 3 items\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m • Initializing evaluators: 1\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m • Loading policy files: 1\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m • Running OPA policy evaluation: 1\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m"] +[19.218183, "o", "\r\u001b[2K\u001b[32m⠹\u001b[0m \u001b[1;32m🧪 Evaluating AICertify Demo Assistant against eu_ai_act\u001b[0m"] +[19.32026, "o", "\r\u001b[2K\u001b[32m⠼\u001b[0m \u001b[1;32m🧪 Evaluating AICertify Demo Assistant against eu_ai_act\u001b[0m"] +[19.422726, "o", "\r\u001b[2K\u001b[32m⠴\u001b[0m \u001b[1;32m🧪 Evaluating AICertify Demo Assistant against eu_ai_act\u001b[0m"] +[19.46788, "o", "\r\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[34m╭──────────────────────────────────────────────────────────────────────────────────────────────────╮\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[1;34m🔰 Evaluation progress: 3 items\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m • Initializing evaluators: 1\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m • Loading policy files: 1\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m • Running OPA policy evaluation: 1\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m"] +[19.524593, "o", "\r\u001b[2K\u001b[32m⠦\u001b[0m \u001b[1;32m🧪 Evaluating AICertify Demo Assistant against eu_ai_act\u001b[0m"] +[19.62655, "o", "\r\u001b[2K\u001b[32m⠧\u001b[0m \u001b[1;32m🧪 Evaluating AICertify Demo Assistant against eu_ai_act\u001b[0m"] +[19.719163, "o", "\r\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[34m╭──────────────────────────────────────────────────────────────────────────────────────────────────╮\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[1;34m🔰 Evaluation progress: 3 items\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m • Initializing evaluators: 1\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m • Loading policy files: 1\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m • Running OPA policy evaluation: 1\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m"] +[19.728031, "o", "\r\u001b[2K\u001b[32m⠏\u001b[0m \u001b[1;32m🧪 Evaluating AICertify Demo Assistant against eu_ai_act\u001b[0m"] +[19.829779, "o", "\r\u001b[2K\u001b[32m⠋\u001b[0m \u001b[1;32m🧪 Evaluating AICertify Demo Assistant against eu_ai_act\u001b[0m"] +[19.930976, "o", "\r\u001b[2K\u001b[32m⠙\u001b[0m \u001b[1;32m🧪 Evaluating AICertify Demo Assistant against eu_ai_act\u001b[0m"] +[19.971518, "o", "\r\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[34m╭──────────────────────────────────────────────────────────────────────────────────────────────────╮\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[1;34m🔰 Evaluation progress: 3 items\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m • Initializing evaluators: 1\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m • Loading policy files: 1\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m • Running OPA policy evaluation: 1\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m"] +[20.033624, "o", "\r\u001b[2K\u001b[32m⠸\u001b[0m \u001b[1;32m🧪 Evaluating AICertify Demo Assistant against eu_ai_act\u001b[0m"] +[20.134962, "o", "\r\u001b[2K\u001b[32m⠼\u001b[0m \u001b[1;32m🧪 Evaluating AICertify Demo Assistant against eu_ai_act\u001b[0m"] +[20.222781, "o", "\r\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[34m╭──────────────────────────────────────────────────────────────────────────────────────────────────╮\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[1;34m🔰 Evaluation progress: 3 items\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m • Initializing evaluators: 1\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m • Loading policy files: 1\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m • Running OPA policy evaluation: 1\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m"] +[20.236206, "o", "\r\u001b[2K\u001b[32m⠴\u001b[0m \u001b[1;32m🧪 Evaluating AICertify Demo Assistant against eu_ai_act\u001b[0m"] +[20.337789, "o", "\r\u001b[2K\u001b[32m⠦\u001b[0m \u001b[1;32m🧪 Evaluating AICertify Demo Assistant against eu_ai_act\u001b[0m"] +[20.439062, "o", "\r\u001b[2K\u001b[32m⠇\u001b[0m \u001b[1;32m🧪 Evaluating AICertify Demo Assistant against eu_ai_act\u001b[0m"] +[20.474196, "o", "\r\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[34m╭──────────────────────────────────────────────────────────────────────────────────────────────────╮\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[1;34m🔰 Evaluation progress: 3 items\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m • Initializing evaluators: 1\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m • Loading policy files: 1\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m • Running OPA policy evaluation: 1\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m"] +[20.540863, "o", "\r\u001b[2K\u001b[32m⠏\u001b[0m \u001b[1;32m🧪 Evaluating AICertify Demo Assistant against eu_ai_act\u001b[0m"] +[20.642996, "o", "\r\u001b[2K\u001b[32m⠋\u001b[0m \u001b[1;32m🧪 Evaluating AICertify Demo Assistant against eu_ai_act\u001b[0m"] +[20.72557, "o", "\r\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[34m╭──────────────────────────────────────────────────────────────────────────────────────────────────╮\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[1;34m🔰 Evaluation progress: 3 items\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m • Initializing evaluators: 1\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m • Loading policy files: 1\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m • Running OPA policy evaluation: 1\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m"] +[20.744181, "o", "\r\u001b[2K\u001b[32m⠙\u001b[0m \u001b[1;32m🧪 Evaluating AICertify Demo Assistant against eu_ai_act\u001b[0m"] +[20.845421, "o", "\r\u001b[2K\u001b[32m⠸\u001b[0m \u001b[1;32m🧪 Evaluating AICertify Demo Assistant against eu_ai_act\u001b[0m"] +[20.947184, "o", "\r\u001b[2K\u001b[32m⠼\u001b[0m \u001b[1;32m🧪 Evaluating AICertify Demo Assistant against eu_ai_act\u001b[0m"] +[20.9769, "o", "\r\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[34m╭──────────────────────────────────────────────────────────────────────────────────────────────────╮\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[1;34m🔰 Evaluation progress: 3 items\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m • Initializing evaluators: 1\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m • Loading policy files: 1\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m • Running OPA policy evaluation: 1\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m"] +[21.048307, "o", "\r\u001b[2K\u001b[32m⠴\u001b[0m \u001b[1;32m🧪 Evaluating AICertify Demo Assistant against eu_ai_act\u001b[0m"] +[21.150029, "o", "\r\u001b[2K\u001b[32m⠧\u001b[0m \u001b[1;32m🧪 Evaluating AICertify Demo Assistant against eu_ai_act\u001b[0m"] +[21.228169, "o", "\r\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[34m╭──────────────────────────────────────────────────────────────────────────────────────────────────╮\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[1;34m🔰 Evaluation progress: 3 items\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m • Initializing evaluators: 1\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m • Loading policy files: 1\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m • Running OPA policy evaluation: 1\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m"] +[21.251386, "o", "\r\u001b[2K\u001b[32m⠇\u001b[0m \u001b[1;32m🧪 Evaluating AICertify Demo Assistant against eu_ai_act\u001b[0m"] +[21.352683, "o", "\r\u001b[2K\u001b[32m⠏\u001b[0m \u001b[1;32m🧪 Evaluating AICertify Demo Assistant against eu_ai_act\u001b[0m"] +[21.454558, "o", "\r\u001b[2K\u001b[32m⠋\u001b[0m \u001b[1;32m🧪 Evaluating AICertify Demo Assistant against eu_ai_act\u001b[0m"] +[21.479855, "o", "\r\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[34m╭──────────────────────────────────────────────────────────────────────────────────────────────────╮\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[1;34m🔰 Evaluation progress: 3 items\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m • Initializing evaluators: 1\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m • Loading policy files: 1\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m • Running OPA policy evaluation: 1\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m"] +[21.555898, "o", "\r\u001b[2K\u001b[32m⠹\u001b[0m \u001b[1;32m🧪 Evaluating AICertify Demo Assistant against eu_ai_act\u001b[0m"] +[21.657242, "o", "\r\u001b[2K\u001b[32m⠸\u001b[0m \u001b[1;32m🧪 Evaluating AICertify Demo Assistant against eu_ai_act\u001b[0m"] +[21.731091, "o", "\r\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[34m╭──────────────────────────────────────────────────────────────────────────────────────────────────╮\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[1;34m🔰 Evaluation progress: 3 items\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m • Initializing evaluators: 1\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m • Loading policy files: 1\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m • Running OPA policy evaluation: 1\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m"] +[21.758436, "o", "\r\u001b[2K\u001b[32m⠼\u001b[0m \u001b[1;32m🧪 Evaluating AICertify Demo Assistant against eu_ai_act\u001b[0m"] +[21.859816, "o", "\r\u001b[2K\u001b[32m⠴\u001b[0m \u001b[1;32m🧪 Evaluating AICertify Demo Assistant against eu_ai_act\u001b[0m"] +[21.961651, "o", "\r\u001b[2K\u001b[32m⠧\u001b[0m \u001b[1;32m🧪 Evaluating AICertify Demo Assistant against eu_ai_act\u001b[0m"] +[21.982619, "o", "\r\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[34m╭──────────────────────────────────────────────────────────────────────────────────────────────────╮\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[1;34m🔰 Evaluation progress: 3 items\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m • Initializing evaluators: 1\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m • Loading policy files: 1\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m • Running OPA policy evaluation: 1\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m"] +[22.06301, "o", "\r\u001b[2K\u001b[32m⠇\u001b[0m \u001b[1;32m🧪 Evaluating AICertify Demo Assistant against eu_ai_act\u001b[0m"] +[22.164912, "o", "\r\u001b[2K\u001b[32m⠏\u001b[0m \u001b[1;32m🧪 Evaluating AICertify Demo Assistant against eu_ai_act\u001b[0m"] +[22.233748, "o", "\r\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[34m╭──────────────────────────────────────────────────────────────────────────────────────────────────╮\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[1;34m🔰 Evaluation progress: 3 items\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m • Initializing evaluators: 1\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m • Loading policy files: 1\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m • Running OPA policy evaluation: 1\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m"] +[22.266146, "o", "\r\u001b[2K\u001b[32m⠋\u001b[0m \u001b[1;32m🧪 Evaluating AICertify Demo Assistant against eu_ai_act\u001b[0m"] +[22.3677, "o", "\r\u001b[2K\u001b[32m⠹\u001b[0m \u001b[1;32m🧪 Evaluating AICertify Demo Assistant against eu_ai_act\u001b[0m"] +[22.468894, "o", "\r\u001b[2K\u001b[32m⠸\u001b[0m \u001b[1;32m🧪 Evaluating AICertify Demo Assistant against eu_ai_act\u001b[0m"] +[22.484997, "o", "\r\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[34m╭──────────────────────────────────────────────────────────────────────────────────────────────────╮\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[1;34m🔰 Evaluation progress: 3 items\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m • Initializing evaluators: 1\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m • Loading policy files: 1\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m • Running OPA policy evaluation: 1\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m"] +[22.570359, "o", "\r\u001b[2K\u001b[32m⠼\u001b[0m \u001b[1;32m🧪 Evaluating AICertify Demo Assistant against eu_ai_act\u001b[0m"] +[22.672277, "o", "\r\u001b[2K\u001b[32m⠦\u001b[0m \u001b[1;32m🧪 Evaluating AICertify Demo Assistant against eu_ai_act\u001b[0m"] +[22.736164, "o", "\r\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[34m╭──────────────────────────────────────────────────────────────────────────────────────────────────╮\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[1;34m🔰 Evaluation progress: 3 items\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m • Initializing evaluators: 1\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m • Loading policy files: 1\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m • Running OPA policy evaluation: 1\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m"] +[22.773549, "o", "\r\u001b[2K\u001b[32m⠧\u001b[0m \u001b[1;32m🧪 Evaluating AICertify Demo Assistant against eu_ai_act\u001b[0m"] +[22.875542, "o", "\r\u001b[2K\u001b[32m⠇\u001b[0m \u001b[1;32m🧪 Evaluating AICertify Demo Assistant against eu_ai_act\u001b[0m"] +[22.976904, "o", "\r\u001b[2K\u001b[32m⠏\u001b[0m \u001b[1;32m🧪 Evaluating AICertify Demo Assistant against eu_ai_act\u001b[0m"] +[22.987263, "o", "\r\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[34m╭──────────────────────────────────────────────────────────────────────────────────────────────────╮\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[1;34m🔰 Evaluation progress: 3 items\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m • Initializing evaluators: 1\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m • Loading policy files: 1\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m • Running OPA policy evaluation: 1\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m"] +[23.078218, "o", "\r\u001b[2K\u001b[32m⠙\u001b[0m \u001b[1;32m🧪 Evaluating AICertify Demo Assistant against eu_ai_act\u001b[0m"] +[23.179527, "o", "\r\u001b[2K\u001b[32m⠹\u001b[0m \u001b[1;32m🧪 Evaluating AICertify Demo Assistant against eu_ai_act\u001b[0m"] +[23.238584, "o", "\r\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[34m╭──────────────────────────────────────────────────────────────────────────────────────────────────╮\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[1;34m🔰 Evaluation progress: 3 items\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m • Initializing evaluators: 1\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m • Loading policy files: 1\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m • Running OPA policy evaluation: 1\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m"] +[23.280805, "o", "\r\u001b[2K\u001b[32m⠸\u001b[0m \u001b[1;32m🧪 Evaluating AICertify Demo Assistant against eu_ai_act\u001b[0m"] +[23.38246, "o", "\r\u001b[2K\u001b[32m⠼\u001b[0m \u001b[1;32m🧪 Evaluating AICertify Demo Assistant against eu_ai_act\u001b[0m"] +[23.483776, "o", "\r\u001b[2K\u001b[32m⠦\u001b[0m \u001b[1;32m🧪 Evaluating AICertify Demo Assistant against eu_ai_act\u001b[0m"] +[23.490269, "o", "\r\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[34m╭──────────────────────────────────────────────────────────────────────────────────────────────────╮\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[1;34m🔰 Evaluation progress: 3 items\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m • Initializing evaluators: 1\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m • Loading policy files: 1\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m • Running OPA policy evaluation: 1\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m"] +[23.585029, "o", "\r\u001b[2K\u001b[32m⠧\u001b[0m \u001b[1;32m🧪 Evaluating AICertify Demo Assistant against eu_ai_act\u001b[0m"] +[23.686497, "o", "\r\u001b[2K\u001b[32m⠇\u001b[0m \u001b[1;32m🧪 Evaluating AICertify Demo Assistant against eu_ai_act\u001b[0m"] +[23.741511, "o", "\r\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[34m╭──────────────────────────────────────────────────────────────────────────────────────────────────╮\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[1;34m🔰 Evaluation progress: 3 items\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m • Initializing evaluators: 1\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m • Loading policy files: 1\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m • Running OPA policy evaluation: 1\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m"] +[23.788256, "o", "\r\u001b[2K\u001b[32m⠏\u001b[0m \u001b[1;32m🧪 Evaluating AICertify Demo Assistant against eu_ai_act\u001b[0m"] +[23.889356, "o", "\r\u001b[2K\u001b[32m⠙\u001b[0m \u001b[1;32m🧪 Evaluating AICertify Demo Assistant against eu_ai_act\u001b[0m"] +[23.990608, "o", "\r\u001b[2K\u001b[32m⠹\u001b[0m \u001b[1;32m🧪 Evaluating AICertify Demo Assistant against eu_ai_act\u001b[0m"] +[23.992671, "o", "\r\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[34m╭──────────────────────────────────────────────────────────────────────────────────────────────────╮\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[1;34m🔰 Evaluation progress: 4 items\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m • Initializing evaluators: 1\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m • Loading policy files: 1\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m • Running OPA policy evaluation: 1\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[2;34m • ... and 1 more\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m"] +[24.013388, "o", "\r\u001b[2K\u001b[32m⠹\u001b[0m \u001b[1;32m🧪 Evaluating AICertify Demo Assistant against eu_ai_act\u001b[0m\r\n\u001b[?25h"] +[24.015151, "o", "\r\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[34m╭──────────────────────────────────────────────────────────────────────────────────────────────────╮\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[1;34m🔰 Evaluation progress: 4 items\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m • Initializing evaluators: 1\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m • Loading policy files: 1\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[34m • Running OPA policy evaluation: 1\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m│\u001b[0m \u001b[2;34m • ... and 1 more\u001b[0m \u001b[34m│\u001b[0m\r\n\u001b[34m╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\r\n\u001b[?25h"] +[24.015909, "o", "\u001b[34m🔰 Evaluation progress complete: \u001b[0m\u001b[1;34m4\u001b[0m\u001b[34m items processed\u001b[0m\r\n"] +[24.016488, "o", "\u001b[1;34m✓ Evaluation complete\u001b[0m\r\n"] +[24.017467, "o", "\u001b[1;34m✓ Report for eu_ai_act: \u001b[0m\u001b[1;34m/tmp/aicertify-demo-test/\u001b[0m\u001b[1;34mfolder_report_AICertify\u001b[0m\u001b[1;34m Demo \u001b[0m\r\n\u001b[1;34mAssistant_2026-\u001b[0m\u001b[1;34m05\u001b[0m\u001b[1;34m-14_204430.md\u001b[0m\r\n"] +[24.017973, "o", "\u001b[1;34m✓ \u001b[0m\r\n\u001b[1;34m🎉 Demo complete 🎉\u001b[0m\r\n"] +[24.018788, "o", "\u001b[34m🔰 Open the report above to see what an AICertify audit deliverable looks like — generated, not \u001b[0m\r\n\u001b[34mhandwritten.\u001b[0m\r\n"] diff --git a/docs/demo.gif b/docs/demo.gif new file mode 100644 index 0000000..dbcb61e Binary files /dev/null and b/docs/demo.gif differ