From 1bc7370a7589755c556fdecad910d3bc7d600f85 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?El=C3=ADas=20Snorrason?= <eliassno@gmail.com>
Date: Thu, 20 Mar 2025 21:56:27 +0000
Subject: [PATCH 01/27] add cleanlab-tlm as a dependency in pyproject.toml

---
 pyproject.toml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pyproject.toml b/pyproject.toml
index 8fc930e..6b36862 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -25,6 +25,7 @@ classifiers = [
   "Programming Language :: Python :: Implementation :: PyPy",
 ]
 dependencies = [
+  "cleanlab-tlm>=1.0.12",
   "codex-sdk==0.1.0a12",
   "pydantic>=2.0.0, <3",
 ]

From 2529ae669d5d574565ae1549135a7822afae52b9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?El=C3=ADas=20Snorrason?= <eliassno@gmail.com>
Date: Thu, 20 Mar 2025 23:06:49 +0000
Subject: [PATCH 02/27] Add response validation functionality using
 TrustworthyRAG

---
 src/cleanlab_codex/__init__.py           |   3 +-
 src/cleanlab_codex/internal/validator.py |  86 +++++++++++++++++
 src/cleanlab_codex/validator.py          | 113 +++++++++++++++++++++++
 tests/internal/test_validator.py         |  55 +++++++++++
 4 files changed, 256 insertions(+), 1 deletion(-)
 create mode 100644 src/cleanlab_codex/internal/validator.py
 create mode 100644 src/cleanlab_codex/validator.py
 create mode 100644 tests/internal/test_validator.py

diff --git a/src/cleanlab_codex/__init__.py b/src/cleanlab_codex/__init__.py
index d1b8ef6..572a626 100644
--- a/src/cleanlab_codex/__init__.py
+++ b/src/cleanlab_codex/__init__.py
@@ -2,5 +2,6 @@
 from cleanlab_codex.client import Client
 from cleanlab_codex.codex_tool import CodexTool
 from cleanlab_codex.project import Project
+from cleanlab_codex.validator import Validator
 
-__all__ = ["Client", "CodexTool", "Project"]
+__all__ = ["Client", "CodexTool", "Project", "Validator"]
diff --git a/src/cleanlab_codex/internal/validator.py b/src/cleanlab_codex/internal/validator.py
new file mode 100644
index 0000000..ff2fd7f
--- /dev/null
+++ b/src/cleanlab_codex/internal/validator.py
@@ -0,0 +1,86 @@
+from __future__ import annotations
+
+from typing import Any
+
+from pydantic import BaseModel, Field
+
+from cleanlab_codex.utils.errors import MissingDependencyError
+
+try:
+    from cleanlab_tlm.utils.rag import Eval, TrustworthyRAGScore, get_default_evals
+except ImportError as e:
+    raise MissingDependencyError(
+        import_name=e.name or "cleanlab-tlm",
+        package_url="https://github.com/cleanlab/cleanlab-tlm",
+    ) from e
+
+
+"""Evaluation metrics (excluding trustworthiness) that are used to determine if a response is bad."""
+EVAL_METRICS = ["response_helpfulness"]
+
+"""Evaluation metrics that are used to determine if a response is bad."""
+BAD_RESPONSE_EVAL_METRICS = ["trustworthiness", *EVAL_METRICS]
+
+
+class IsBadResponseConfig(BaseModel):
+    """Config for determining if a response is bad.
+    Each key is an evaluation metric and the value is a threshold such that if the score is below the threshold, the response is bad.
+    """
+
+    trustworthiness: float = Field(
+        description="Threshold for trustworthiness. If the score is below this threshold, the response is bad.",
+        default=0.5,
+        ge=0,
+        le=1,
+    )
+    response_helpfulness: float = Field(
+        description="Threshold for response helpfulness. If the score is below this threshold, the response is bad.",
+        default=0.5,
+        ge=0,
+        le=1,
+    )
+
+
+def get_default_evaluations() -> list[Eval]:
+    """Get the default evaluations for the TrustworthyRAG.
+
+    Note:
+        This excludes trustworthiness, which is automatically computed by TrustworthyRAG.
+    """
+    return [evaluation for evaluation in get_default_evals() if evaluation.name in EVAL_METRICS]
+
+
+DEFAULT_IS_BAD_RESPONSE_CONFIG: IsBadResponseConfig = IsBadResponseConfig(
+    trustworthiness=0.5,
+    response_helpfulness=0.5,
+)
+
+
+DEFAULT_TRUSTWORTHYRAG_CONFIG = {
+    "options": {
+        "log": ["explanation"],
+    },
+}
+
+
+def get_default_trustworthyrag_config() -> dict[str, Any]:
+    """Get the default configuration for the TrustworthyRAG."""
+    return DEFAULT_TRUSTWORTHYRAG_CONFIG
+
+
+def is_bad_response(scores: TrustworthyRAGScore, is_bad_response_config: IsBadResponseConfig | None = None) -> bool:
+    """
+    Check if the response is bad based on the scores computed by TrustworthyRAG and the config containing thresholds.
+    """
+    is_bad_response_config_dict: dict[str, float] = IsBadResponseConfig.model_validate(
+        is_bad_response_config or DEFAULT_IS_BAD_RESPONSE_CONFIG
+    ).model_dump()
+    for eval_metric in BAD_RESPONSE_EVAL_METRICS:
+        score = scores[eval_metric]["score"]
+        if score is None:
+            error_msg = f"Score for {eval_metric} is None"
+            raise ValueError(error_msg)
+        threshold = is_bad_response_config_dict[eval_metric]
+        if score < threshold:
+            return True
+    return False
diff --git a/src/cleanlab_codex/validator.py b/src/cleanlab_codex/validator.py
new file mode 100644
index 0000000..7949c16
--- /dev/null
+++ b/src/cleanlab_codex/validator.py
@@ -0,0 +1,113 @@
+"""
+Leverage Cleanlab's Evals and Codex to detect and remediate bad responses in RAG applications.
+"""
+
+from __future__ import annotations
+
+from typing import Any, Optional, cast
+
+from cleanlab_codex.internal.validator import (
+    IsBadResponseConfig,
+    get_default_evaluations,
+    get_default_trustworthyrag_config,
+)
+from cleanlab_codex.internal.validator import is_bad_response as _is_bad_response
+from cleanlab_codex.project import Project
+from cleanlab_codex.utils.errors import MissingDependencyError
+
+try:
+    from cleanlab_tlm import TrustworthyRAG
+    from cleanlab_tlm.utils.rag import TrustworthyRAGScore
+except ImportError as e:
+    raise MissingDependencyError(
+        import_name=e.name or "cleanlab-tlm",
+        package_url="https://github.com/cleanlab/cleanlab-tlm",
+    ) from e
+
+
+class Validator:
+    def __init__(
+        self,
+        codex_access_key: str,
+        tlm_api_key: Optional[str] = None,
+        trustworthy_rag_config: Optional[dict[str, Any]] = None,
+        is_bad_response_config: Optional[dict[str, float]] = None,
+    ):
+        """Evaluates the quality of responses generated in RAG applications and remediates them if needed.
+
+        This object combines Cleanlab's various Evals with thresholding to detect bad responses and remediates them with Codex.
+
+        Args:
+            codex_access_key (str): The [access key](/codex/web_tutorials/create_project/#access-keys) for a Codex project.
+            tlm_api_key (Optional[str]): The API key for [TrustworthyRAG](/tlm/api/python/utils.rag/#class-trustworthyrag).
+            trustworthy_rag_config (Optional[dict[str, Any]]): The constructor arguments for [TrustworthyRAG](/tlm/api/python/utils.rag/#class-trustworthyrag).
+            is_bad_response_config (Optional[dict[str, float]]): The configuration for determining if a response is bad.
+        """
+        trustworthy_rag_config = trustworthy_rag_config or get_default_trustworthyrag_config()
+        if tlm_api_key is not None:
+            trustworthy_rag_config["api_key"] = tlm_api_key
+        self._is_bad_response_config = IsBadResponseConfig.model_validate(is_bad_response_config or {})
+
+        self._project: Project = Project.from_access_key(access_key=codex_access_key)
+
+        trustworthy_rag_config.setdefault("evals", get_default_evaluations())
+        self._tlm_rag = TrustworthyRAG(**trustworthy_rag_config)
+
+    def validate(self, query: str, context: str, response: str) -> dict[str, Any]:
+        """Validate the response quality and generate an alternative response if needed.
+
+        Args:
+            query (str): The user's original query.
+            context (str): The context provided to generate the response.
+            response (str): The response to evaluate.
+
+        Returns:
+            dict[str, Any]: A dictionary containing:
+                - 'is_bad_response': True if the response is determined to be bad, False otherwise.
+                - 'alt_answer': The alternative response from Codex, or None if no response could be fetched from Codex.
+                - Other evaluation metrics from TrustworthyRAG.
+        """
+        scores, is_bad_response = self.detect(query, context, response)
+        alt_answer = None
+        if is_bad_response:
+            alt_answer = self.remediate(query)
+
+        return {
+            "is_bad_response": is_bad_response,
+            "alt_answer": alt_answer,
+            **scores,
+        }
+
+    def detect(self, query: str, context: str, response: str) -> tuple[TrustworthyRAGScore, bool]:
+        """Evaluate the response quality using TrustworthyRAG and determine if it is a bad response.
+
+        Args:
+            query (str): The user's original query.
+            context (str): The context provided to generate the response.
+            response (str): The response to evaluate.
+
+        Returns:
+            tuple[TrustworthyRAGScore, bool]: A tuple containing:
+                - TrustworthyRAGScore: Quality scores for different evaluation metrics like trustworthiness
+                  and response helpfulness. Each metric has a score between 0-1.
+                - bool: True if the response is determined to be bad based on the evaluation scores
+                  and configured thresholds, False otherwise.
+        """
+        scores = cast(TrustworthyRAGScore, self._tlm_rag.score(response=response, query=query, context=context))
+        _config = (
+            IsBadResponseConfig.model_validate(self._is_bad_response_config) if self._is_bad_response_config else None
+        )
+        is_bad_response = _is_bad_response(scores, _config)
+        return scores, is_bad_response
+
+    def remediate(self, query: str) -> str | None:
+        """Queries Codex to get an alternative response when the original response is determined to be bad.
+
+        Args:
+            query (str): The user's original query to get an alternative response for.
+
+        Returns:
+            str | None: The alternative response from Codex, or None if no response could be fetched from Codex.
+        """
+        codex_answer, _ = self._project.query(question=query)
+        return codex_answer
diff --git a/tests/internal/test_validator.py b/tests/internal/test_validator.py
new file mode 100644
index 0000000..d3ce3ab
--- /dev/null
+++ b/tests/internal/test_validator.py
@@ -0,0 +1,55 @@
+from typing import cast
+
+import pytest
+from cleanlab_tlm.utils.rag import TrustworthyRAGScore
+
+from cleanlab_codex.internal.validator import IsBadResponseConfig, get_default_evaluations, is_bad_response
+
+
+def make_scores(trustworthiness: float, response_helpfulness: float) -> TrustworthyRAGScore:
+    scores = {
+        "trustworthiness": {
+            "score": trustworthiness,
+        },
+        "response_helpfulness": {
+            "score": response_helpfulness,
+        },
+    }
+    return cast(TrustworthyRAGScore, scores)
+
+
+def make_is_bad_response_config(trustworthiness: float, response_helpfulness: float) -> IsBadResponseConfig:
+    return IsBadResponseConfig(
+        trustworthiness=trustworthiness,
+        response_helpfulness=response_helpfulness,
+    )
+
+
+def test_get_default_evaluations() -> None:
+    assert {evaluation.name for evaluation in get_default_evaluations()} == {"response_helpfulness"}
+
+
+class TestIsBadResponse:
+    @pytest.fixture
+    def scores(self) -> TrustworthyRAGScore:
+        return make_scores(0.92, 0.75)
+
+    @pytest.fixture
+    def custom_is_bad_response_config(self) -> IsBadResponseConfig:
+        return make_is_bad_response_config(0.6, 0.7)
+
+    def test_thresholds(self, scores: TrustworthyRAGScore) -> None:
+        default_is_bad_response = is_bad_response(scores)
+        assert not default_is_bad_response
+
+        # High trustworthiness_threshold
+        is_bad_response_config = make_is_bad_response_config(0.921, 0.5)
+        assert is_bad_response(scores, is_bad_response_config)
+
+        # High response_helpfulness_threshold
+        is_bad_response_config = make_is_bad_response_config(0.5, 0.751)
+        assert is_bad_response(scores, is_bad_response_config)
+
+    def test_scores(self, custom_is_bad_response_config: IsBadResponseConfig) -> None:
+        scores = make_scores(0.59, 0.7)
+        assert is_bad_response(scores, custom_is_bad_response_config)

From 722d287cae778a2a350e651b9fc93cd3f6062706 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?El=C3=ADas=20Snorrason?= <eliassno@gmail.com>
Date: Fri, 21 Mar 2025 00:57:10 +0000
Subject: [PATCH 03/27] alt_answer -> expert_answer

---
 src/cleanlab_codex/validator.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/cleanlab_codex/validator.py b/src/cleanlab_codex/validator.py
index 7949c16..5074341 100644
--- a/src/cleanlab_codex/validator.py
+++ b/src/cleanlab_codex/validator.py
@@ -64,17 +64,17 @@ def validate(self, query: str, context: str, response: str) -> dict[str, Any]:
         Returns:
             dict[str, Any]: A dictionary containing:
                 - 'is_bad_response': True if the response is determined to be bad, False otherwise.
-                - 'alt_answer': The alternative response from Codex, or None if no response could be fetched from Codex.
+                - 'expert_answer': The alternative response from Codex, or None if no response could be fetched from Codex.
                 - Other evaluation metrics from TrustworthyRAG.
         """
         scores, is_bad_response = self.detect(query, context, response)
-        alt_answer = None
+        expert_answer = None
         if is_bad_response:
-            alt_answer = self.remediate(query)
+            expert_answer = self.remediate(query)
 
         return {
             "is_bad_response": is_bad_response,
-            "alt_answer": alt_answer,
+            "expert_answer": expert_answer,
             **scores,
         }
 

From 6f64a127b5e1b87834ceb3b7eda59d357bb9d800 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?El=C3=ADas=20Snorrason?= <eliassno@gmail.com>
Date: Fri, 21 Mar 2025 14:10:37 +0000
Subject: [PATCH 04/27] address comments

---
 src/cleanlab_codex/internal/validator.py | 23 +++++---------------
 src/cleanlab_codex/validator.py          | 27 ++++++++++++------------
 tests/internal/test_validator.py         | 10 ++++-----
 3 files changed, 23 insertions(+), 37 deletions(-)

diff --git a/src/cleanlab_codex/internal/validator.py b/src/cleanlab_codex/internal/validator.py
index ff2fd7f..09b91b9 100644
--- a/src/cleanlab_codex/internal/validator.py
+++ b/src/cleanlab_codex/internal/validator.py
@@ -22,7 +22,8 @@
 BAD_RESPONSE_EVAL_METRICS = ["trustworthiness", *EVAL_METRICS]
 
 
-class IsBadResponseConfig(BaseModel):
+
+class BadResponseThresholds(BaseModel):
     """Config for determining if a response is bad.
     Each key is an evaluation metric and the value is a threshold such that if the score is below the threshold, the response is bad.
     """
@@ -50,12 +51,6 @@ def get_default_evaluations() -> list[Eval]:
     return [evaluation for evaluation in get_default_evals() if evaluation.name in EVAL_METRICS]
 
 
-DEFAULT_IS_BAD_RESPONSE_CONFIG: IsBadResponseConfig = IsBadResponseConfig(
-    trustworthiness=0.5,
-    response_helpfulness=0.5,
-)
-
-
 DEFAULT_TRUSTWORTHYRAG_CONFIG = {
     "options": {
         "log": ["explanation"],
@@ -68,19 +63,11 @@ def get_default_trustworthyrag_config() -> dict[str, Any]:
     return DEFAULT_TRUSTWORTHYRAG_CONFIG
 
 
-def is_bad_response(scores: TrustworthyRAGScore, is_bad_response_config: IsBadResponseConfig | None = None) -> bool:
+def is_bad_response(scores: TrustworthyRAGScore, thresholds: dict[str, float]) -> bool:
     """
     Check if the response is bad based on the scores computed by TrustworthyRAG and the config containing thresholds.
     """
-    is_bad_response_config_dict: dict[str, float] = IsBadResponseConfig.model_validate(
-        is_bad_response_config or DEFAULT_IS_BAD_RESPONSE_CONFIG
-    ).model_dump()
-    for eval_metric in BAD_RESPONSE_EVAL_METRICS:
-        score = scores[eval_metric]["score"]
-        if score is None:
-            error_msg = f"Score for {eval_metric} is None"
-            raise ValueError(error_msg)
-        threshold = is_bad_response_config_dict[eval_metric]
-        if score < threshold:
+    for eval_metric, threshold in thresholds.items():
+        if scores[eval_metric]["score"] < threshold:
             return True
     return False
diff --git a/src/cleanlab_codex/validator.py b/src/cleanlab_codex/validator.py
index 5074341..08b8739 100644
--- a/src/cleanlab_codex/validator.py
+++ b/src/cleanlab_codex/validator.py
@@ -7,7 +7,7 @@
 from typing import Any, Optional, cast
 
 from cleanlab_codex.internal.validator import (
-    IsBadResponseConfig,
+    BadResponseThresholds,
     get_default_evaluations,
     get_default_trustworthyrag_config,
 )
@@ -31,7 +31,7 @@ def __init__(
         codex_access_key: str,
         tlm_api_key: Optional[str] = None,
         trustworthy_rag_config: Optional[dict[str, Any]] = None,
-        is_bad_response_config: Optional[dict[str, float]] = None,
+        bad_response_thresholds: Optional[dict[str, float]] = None,
     ):
         """Evaluates the quality of responses generated in RAG applications and remediates them if needed.
 
@@ -41,12 +41,12 @@ def __init__(
             codex_access_key (str): The [access key](/codex/web_tutorials/create_project/#access-keys) for a Codex project.
             tlm_api_key (Optional[str]): The API key for [TrustworthyRAG](/tlm/api/python/utils.rag/#class-trustworthyrag).
             trustworthy_rag_config (Optional[dict[str, Any]]): The constructor arguments for [TrustworthyRAG](/tlm/api/python/utils.rag/#class-trustworthyrag).
-            is_bad_response_config (Optional[dict[str, float]]): The configuration for determining if a response is bad.
+            bad_response_thresholds (Optional[dict[str, float]]): The thresholds for determining if a response is bad.
         """
         trustworthy_rag_config = trustworthy_rag_config or get_default_trustworthyrag_config()
         if tlm_api_key is not None:
             trustworthy_rag_config["api_key"] = tlm_api_key
-        self._is_bad_response_config = IsBadResponseConfig.model_validate(is_bad_response_config or {})
+        self._bad_response_thresholds = BadResponseThresholds.model_validate(bad_response_thresholds or {})
 
         self._project: Project = Project.from_access_key(access_key=codex_access_key)
 
@@ -63,9 +63,9 @@ def validate(self, query: str, context: str, response: str) -> dict[str, Any]:
 
         Returns:
             dict[str, Any]: A dictionary containing:
-                - 'is_bad_response': True if the response is determined to be bad, False otherwise.
-                - 'expert_answer': The alternative response from Codex, or None if no response could be fetched from Codex.
-                - Other evaluation metrics from TrustworthyRAG.
+                - 'is_bad_response': True if the response is flagged as potentially bad, False otherwise.
+                - 'expert_answer': Alternate SME-provided answer from Codex, or None if no answer could be found in the Codex Project.  
+                - Raw scores from [TrustworthyRAG](/tlm/api/python/utils.rag/#class-trustworthyrag) for each evaluation metric.
         """
         scores, is_bad_response = self.detect(query, context, response)
         expert_answer = None
@@ -94,20 +94,19 @@ def detect(self, query: str, context: str, response: str) -> tuple[TrustworthyRA
                   and configured thresholds, False otherwise.
         """
         scores = cast(TrustworthyRAGScore, self._tlm_rag.score(response=response, query=query, context=context))
-        _config = (
-            IsBadResponseConfig.model_validate(self._is_bad_response_config) if self._is_bad_response_config else None
-        )
-        is_bad_response = _is_bad_response(scores, _config)
+        
+        thresholds_dict = self._bad_response_thresholds.model_dump()
+        is_bad_response = _is_bad_response(scores, thresholds_dict)
         return scores, is_bad_response
 
     def remediate(self, query: str) -> str | None:
-        """Queries Codex to get an alternative response when the original response is determined to be bad.
+        """Request a SME-provided answer for this query, if one is available in Codex.  
 
         Args:
-            query (str): The user's original query to get an alternative response for.
+            query (str): The user's original query to get SME-provided answer for.
 
         Returns:
-            str | None: The alternative response from Codex, or None if no response could be fetched from Codex.
+            str | None: The SME-provided answer from Codex, or None if no answer could be found in the Codex Project.
         """
         codex_answer, _ = self._project.query(question=query)
         return codex_answer
diff --git a/tests/internal/test_validator.py b/tests/internal/test_validator.py
index d3ce3ab..847c31d 100644
--- a/tests/internal/test_validator.py
+++ b/tests/internal/test_validator.py
@@ -3,7 +3,7 @@
 import pytest
 from cleanlab_tlm.utils.rag import TrustworthyRAGScore
 
-from cleanlab_codex.internal.validator import IsBadResponseConfig, get_default_evaluations, is_bad_response
+from cleanlab_codex.internal.validator import BadResponseThresholds, get_default_evaluations, is_bad_response
 
 
 def make_scores(trustworthiness: float, response_helpfulness: float) -> TrustworthyRAGScore:
@@ -18,8 +18,8 @@ def make_scores(trustworthiness: float, response_helpfulness: float) -> Trustwor
     return cast(TrustworthyRAGScore, scores)
 
 
-def make_is_bad_response_config(trustworthiness: float, response_helpfulness: float) -> IsBadResponseConfig:
-    return IsBadResponseConfig(
+def make_is_bad_response_config(trustworthiness: float, response_helpfulness: float) -> BadResponseThresholds:
+    return BadResponseThresholds(
         trustworthiness=trustworthiness,
         response_helpfulness=response_helpfulness,
     )
@@ -35,7 +35,7 @@ def scores(self) -> TrustworthyRAGScore:
         return make_scores(0.92, 0.75)
 
     @pytest.fixture
-    def custom_is_bad_response_config(self) -> IsBadResponseConfig:
+    def custom_is_bad_response_config(self) -> BadResponseThresholds:
         return make_is_bad_response_config(0.6, 0.7)
 
     def test_thresholds(self, scores: TrustworthyRAGScore) -> None:
@@ -50,6 +50,6 @@ def test_thresholds(self, scores: TrustworthyRAGScore) -> None:
         is_bad_response_config = make_is_bad_response_config(0.5, 0.751)
         assert is_bad_response(scores, is_bad_response_config)
 
-    def test_scores(self, custom_is_bad_response_config: IsBadResponseConfig) -> None:
+    def test_scores(self, custom_is_bad_response_config: BadResponseThresholds) -> None:
         scores = make_scores(0.59, 0.7)
         assert is_bad_response(scores, custom_is_bad_response_config)

From a2c0ea58265c0011985a10040cd89efaee5ece59 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?El=C3=ADas=20Snorrason?= <eliassno@gmail.com>
Date: Fri, 21 Mar 2025 15:43:00 +0000
Subject: [PATCH 05/27] have is_bad_response function take the
 BadResponseThreshold object instead of a dict

---
 src/cleanlab_codex/validator.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/cleanlab_codex/validator.py b/src/cleanlab_codex/validator.py
index 08b8739..a4baa23 100644
--- a/src/cleanlab_codex/validator.py
+++ b/src/cleanlab_codex/validator.py
@@ -95,8 +95,7 @@ def detect(self, query: str, context: str, response: str) -> tuple[TrustworthyRA
         """
         scores = cast(TrustworthyRAGScore, self._tlm_rag.score(response=response, query=query, context=context))
         
-        thresholds_dict = self._bad_response_thresholds.model_dump()
-        is_bad_response = _is_bad_response(scores, thresholds_dict)
+        is_bad_response = _is_bad_response(scores, self._bad_response_thresholds)
         return scores, is_bad_response
 
     def remediate(self, query: str) -> str | None:

From b8a1e97d7ae4e4fc92015108ebd18b48276c6fbe Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?El=C3=ADas=20Snorrason?= <eliassno@gmail.com>
Date: Sat, 22 Mar 2025 00:44:23 +0000
Subject: [PATCH 06/27] Enhance Validator with flexible thresholds and improved
 error handling

Adds support for custom evaluation thresholds, introduces ThresholdedTrustworthyRAGScore type,
and improves validation error handling with better documentation.
---
 src/cleanlab_codex/internal/validator.py | 86 +++++++++++++++++-------
 src/cleanlab_codex/types/validator.py    | 35 ++++++++++
 src/cleanlab_codex/validator.py          | 63 +++++++++++------
 tests/internal/test_validator.py         |  3 -
 4 files changed, 139 insertions(+), 48 deletions(-)
 create mode 100644 src/cleanlab_codex/types/validator.py

diff --git a/src/cleanlab_codex/internal/validator.py b/src/cleanlab_codex/internal/validator.py
index 09b91b9..1c5c54f 100644
--- a/src/cleanlab_codex/internal/validator.py
+++ b/src/cleanlab_codex/internal/validator.py
@@ -1,8 +1,8 @@
 from __future__ import annotations
 
-from typing import Any
+from typing import TYPE_CHECKING, Any, cast
 
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, Field, field_validator
 
 from cleanlab_codex.utils.errors import MissingDependencyError
 
@@ -14,13 +14,12 @@
         package_url="https://github.com/cleanlab/cleanlab-tlm",
     ) from e
 
+if TYPE_CHECKING:
+    from cleanlab_codex.types.validator import ThresholdedTrustworthyRAGScore
 
-"""Evaluation metrics (excluding trustworthiness) that are used to determine if a response is bad."""
-EVAL_METRICS = ["response_helpfulness"]
-
-"""Evaluation metrics that are used to determine if a response is bad."""
-BAD_RESPONSE_EVAL_METRICS = ["trustworthiness", *EVAL_METRICS]
 
+"""Evaluation metrics (excluding trustworthiness) that are used to determine if a response is bad."""
+DEFAULT_EVAL_METRICS = ["response_helpfulness"]
 
 
 class BadResponseThresholds(BaseModel):
@@ -29,18 +28,53 @@ class BadResponseThresholds(BaseModel):
     """
 
     trustworthiness: float = Field(
-        description="Threshold for trustworthiness. If the score is below this threshold, the response is bad.",
+        description="Threshold for trustworthiness.",
         default=0.5,
-        ge=0,
-        le=1,
+        ge=0.0,
+        le=1.0,
     )
     response_helpfulness: float = Field(
-        description="Threshold for response helpfulness. If the score is below this threshold, the response is bad.",
+        description="Threshold for response helpfulness.",
         default=0.5,
-        ge=0,
-        le=1,
+        ge=0.0,
+        le=1.0,
     )
 
+    @property
+    def default_threshold(self) -> float:
+        """The default threshold to use when a specific evaluation metric's threshold is not set. This threshold is set to 0.5."""
+        return 0.5
+
+    def get_threshold(self, eval_name: str) -> float:
+        """Get threshold for an eval if it exists.
+
+        For fields defined in the model, returns their value (which may be the field's default).
+        For custom evals not defined in the model, returns the default threshold value (see `default_threshold`).
+        """
+
+        # For fields defined in the model, use their value (which may be the field's default)
+        if eval_name in self.model_fields:
+            return cast(float, getattr(self, eval_name))
+
+        # For custom evals, use the default threshold
+        return getattr(self, eval_name, self.default_threshold)
+
+    @field_validator("*")
+    @classmethod
+    def validate_threshold(cls, v: Any) -> float:
+        """Validate that all fields (including dynamic ones) are floats between 0 and 1."""
+        if not isinstance(v, (int, float)):
+            error_msg = f"Threshold must be a number, got {type(v)}"
+            raise TypeError(error_msg)
+        if not 0 <= float(v) <= 1:
+            error_msg = f"Threshold must be between 0 and 1, got {v}"
+            raise ValueError(error_msg)
+        return float(v)
+
+    model_config = {
+        "extra": "allow"  # Allow additional fields for custom eval thresholds
+    }
+
 
 def get_default_evaluations() -> list[Eval]:
     """Get the default evaluations for the TrustworthyRAG.
@@ -48,26 +82,28 @@ def get_default_evaluations() -> list[Eval]:
     Note:
         This excludes trustworthiness, which is automatically computed by TrustworthyRAG.
     """
-    return [evaluation for evaluation in get_default_evals() if evaluation.name in EVAL_METRICS]
-
-
-DEFAULT_TRUSTWORTHYRAG_CONFIG = {
-    "options": {
-        "log": ["explanation"],
-    },
-}
+    return [evaluation for evaluation in get_default_evals() if evaluation.name in DEFAULT_EVAL_METRICS]
 
 
 def get_default_trustworthyrag_config() -> dict[str, Any]:
     """Get the default configuration for the TrustworthyRAG."""
-    return DEFAULT_TRUSTWORTHYRAG_CONFIG
+    return {
+        "options": {
+            "log": ["explanation"],
+        },
+    }
 
 
-def is_bad_response(scores: TrustworthyRAGScore, thresholds: dict[str, float]) -> bool:
+def is_bad_response(
+    scores: TrustworthyRAGScore | ThresholdedTrustworthyRAGScore, thresholds: BadResponseThresholds
+) -> bool:
     """
     Check if the response is bad based on the scores computed by TrustworthyRAG and the config containing thresholds.
     """
-    for eval_metric, threshold in thresholds.items():
-        if scores[eval_metric]["score"] < threshold:
+    for eval_metric, score_dict in scores.items():
+        score = score_dict["score"]
+        if score is None:
+            continue
+        if score < thresholds.get_threshold(eval_metric):
             return True
     return False
diff --git a/src/cleanlab_codex/types/validator.py b/src/cleanlab_codex/types/validator.py
new file mode 100644
index 0000000..25b5a1d
--- /dev/null
+++ b/src/cleanlab_codex/types/validator.py
@@ -0,0 +1,35 @@
+from cleanlab_tlm.utils.rag import EvalMetric
+
+
+class ThresholdedEvalMetric(EvalMetric):
+    is_bad: bool
+
+
+ThresholdedEvalMetric.__doc__ = f"""
+{EvalMetric.__doc__}
+
+is_bad: bool
+    Whether the score is a certain threshold.
+"""
+
+
+class ThresholdedTrustworthyRAGScore(dict[str, ThresholdedEvalMetric]):
+    """Object returned by `Validator.detect` containing evaluation scores from [TrustworthyRAGScore](/tlm/api/python/utils.rag/#class-trustworthyragscore)
+    along with a boolean flag, `is_bad`, indicating whether the score is below the threshold.
+
+    Example:
+        ```python
+        {
+            "trustworthiness": {
+                "score": 0.92,
+                "log": {"explanation": "Did not find a reason to doubt trustworthiness."},
+                "is_bad": False
+            },
+            "response_helpfulness": {
+                "score": 0.35,
+                "is_bad": True
+            },
+            ...
+        }
+        ```
+    """
\ No newline at end of file
diff --git a/src/cleanlab_codex/validator.py b/src/cleanlab_codex/validator.py
index a4baa23..76bb760 100644
--- a/src/cleanlab_codex/validator.py
+++ b/src/cleanlab_codex/validator.py
@@ -13,11 +13,11 @@
 )
 from cleanlab_codex.internal.validator import is_bad_response as _is_bad_response
 from cleanlab_codex.project import Project
+from cleanlab_codex.types.validator import ThresholdedTrustworthyRAGScore
 from cleanlab_codex.utils.errors import MissingDependencyError
 
 try:
     from cleanlab_tlm import TrustworthyRAG
-    from cleanlab_tlm.utils.rag import TrustworthyRAGScore
 except ImportError as e:
     raise MissingDependencyError(
         import_name=e.name or "cleanlab-tlm",
@@ -40,32 +40,47 @@ def __init__(
         Args:
             codex_access_key (str): The [access key](/codex/web_tutorials/create_project/#access-keys) for a Codex project.
             tlm_api_key (Optional[str]): The API key for [TrustworthyRAG](/tlm/api/python/utils.rag/#class-trustworthyrag).
-            trustworthy_rag_config (Optional[dict[str, Any]]): The constructor arguments for [TrustworthyRAG](/tlm/api/python/utils.rag/#class-trustworthyrag).
-            bad_response_thresholds (Optional[dict[str, float]]): The thresholds for determining if a response is bad.
+            trustworthy_rag_config (Optional[dict[str, Any]]): Optional initialization arguments for [TrustworthyRAG](/tlm/api/python/utils.rag/#class-trustworthyrag), which is used to detect response issues.
+            bad_response_thresholds (Optional[dict[str, float]]): Detection score thresholds used to flag whether or not a response is considered bad. Each key in this dict corresponds to an Eval from TrustworthyRAG, and the value indicates a threshold below which scores from this Eval are considered detected issues.  A response is flagged as bad if any issues are detected for it.
         """
         trustworthy_rag_config = trustworthy_rag_config or get_default_trustworthyrag_config()
+        if tlm_api_key is not None and "api_key" in trustworthy_rag_config:
+            error_msg = "Cannot specify both tlm_api_key and api_key in trustworthy_rag_config"
+            raise ValueError(error_msg)
         if tlm_api_key is not None:
             trustworthy_rag_config["api_key"] = tlm_api_key
-        self._bad_response_thresholds = BadResponseThresholds.model_validate(bad_response_thresholds or {})
 
         self._project: Project = Project.from_access_key(access_key=codex_access_key)
 
         trustworthy_rag_config.setdefault("evals", get_default_evaluations())
         self._tlm_rag = TrustworthyRAG(**trustworthy_rag_config)
 
+        # Validate that all the necessary thresholds are present in the TrustworthyRAG.
+        _evals = [e.name for e in self._tlm_rag.get_evals()] + ["trustworthiness"]
+
+        self._bad_response_thresholds = BadResponseThresholds.model_validate(bad_response_thresholds or {})
+
+        _threshold_keys = self._bad_response_thresholds.model_dump().keys()
+
+        # Check if there are any thresholds without corresponding evals (this is an error)
+        _extra_thresholds = set(_threshold_keys) - set(_evals)
+        if _extra_thresholds:
+            error_msg = f"Found thresholds for non-existent evaluation metrics: {_extra_thresholds}"
+            raise ValueError(error_msg)
+
     def validate(self, query: str, context: str, response: str) -> dict[str, Any]:
-        """Validate the response quality and generate an alternative response if needed.
+        """Evaluate whether the AI-generated response is bad, and if so, request an alternate expert response.
 
         Args:
-            query (str): The user's original query.
-            context (str): The context provided to generate the response.
-            response (str): The response to evaluate.
+            query (str): The user query that was used to generate the response.
+            context (str): The context that was retrieved from the RAG Knowledge Base and used to generate the response.
+            response (str): A reponse from your LLM/RAG system.
 
         Returns:
             dict[str, Any]: A dictionary containing:
                 - 'is_bad_response': True if the response is flagged as potentially bad, False otherwise.
-                - 'expert_answer': Alternate SME-provided answer from Codex, or None if no answer could be found in the Codex Project.  
-                - Raw scores from [TrustworthyRAG](/tlm/api/python/utils.rag/#class-trustworthyrag) for each evaluation metric.
+                - 'expert_answer': Alternate SME-provided answer from Codex, or None if no answer could be found in the Codex Project.
+                - Additional keys: Various keys from a [`ThresholdedTrustworthyRAGScore`](/cleanlab_codex/types/validator/#class-thresholdedtrustworthyragscore) dictionary, with raw scores from [TrustworthyRAG](/tlm/api/python/utils.rag/#class-trustworthyrag) for each evaluation metric.  `is_bad` indicating whether the score is below the threshold.
         """
         scores, is_bad_response = self.detect(query, context, response)
         expert_answer = None
@@ -78,28 +93,36 @@ def validate(self, query: str, context: str, response: str) -> dict[str, Any]:
             **scores,
         }
 
-    def detect(self, query: str, context: str, response: str) -> tuple[TrustworthyRAGScore, bool]:
+    def detect(self, query: str, context: str, response: str) -> tuple[ThresholdedTrustworthyRAGScore, bool]:
         """Evaluate the response quality using TrustworthyRAG and determine if it is a bad response.
 
         Args:
-            query (str): The user's original query.
-            context (str): The context provided to generate the response.
-            response (str): The response to evaluate.
+            query (str): The user query that was used to generate the response.
+            context (str): The context that was retrieved from the RAG Knowledge Base and used to generate the response.
+            response (str): A reponse from your LLM/RAG system.
 
         Returns:
-            tuple[TrustworthyRAGScore, bool]: A tuple containing:
-                - TrustworthyRAGScore: Quality scores for different evaluation metrics like trustworthiness
-                  and response helpfulness. Each metric has a score between 0-1.
+            tuple[ThresholdedTrustworthyRAGScore, bool]: A tuple containing:
+                - ThresholdedTrustworthyRAGScore: Quality scores for different evaluation metrics like trustworthiness
+                  and response helpfulness. Each metric has a score between 0-1. It also has a boolean flag, `is_bad` indicating whether the score is below a given threshold.
                 - bool: True if the response is determined to be bad based on the evaluation scores
                   and configured thresholds, False otherwise.
         """
-        scores = cast(TrustworthyRAGScore, self._tlm_rag.score(response=response, query=query, context=context))
-        
+        scores = cast(
+            ThresholdedTrustworthyRAGScore, self._tlm_rag.score(response=response, query=query, context=context)
+        )
+
+        # Enhance each score dictionary with its threshold check
+        for eval_name, score_dict in scores.items():
+            score_dict.setdefault("is_bad", False)
+            if (score := score_dict["score"]) is not None:
+                score_dict["is_bad"] = score < self._bad_response_thresholds.get_threshold(eval_name)
+
         is_bad_response = _is_bad_response(scores, self._bad_response_thresholds)
         return scores, is_bad_response
 
     def remediate(self, query: str) -> str | None:
-        """Request a SME-provided answer for this query, if one is available in Codex.  
+        """Request a SME-provided answer for this query, if one is available in Codex.
 
         Args:
             query (str): The user's original query to get SME-provided answer for.
diff --git a/tests/internal/test_validator.py b/tests/internal/test_validator.py
index 847c31d..45b9719 100644
--- a/tests/internal/test_validator.py
+++ b/tests/internal/test_validator.py
@@ -39,9 +39,6 @@ def custom_is_bad_response_config(self) -> BadResponseThresholds:
         return make_is_bad_response_config(0.6, 0.7)
 
     def test_thresholds(self, scores: TrustworthyRAGScore) -> None:
-        default_is_bad_response = is_bad_response(scores)
-        assert not default_is_bad_response
-
         # High trustworthiness_threshold
         is_bad_response_config = make_is_bad_response_config(0.921, 0.5)
         assert is_bad_response(scores, is_bad_response_config)

From db5fe24e5d31c2d31ed130617b0ed5b3979c454b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?El=C3=ADas=20Snorrason?= <eliassno@gmail.com>
Date: Sat, 22 Mar 2025 00:49:57 +0000
Subject: [PATCH 07/27] move BadResponseThresholds

---
 src/cleanlab_codex/internal/validator.py | 62 ++----------------------
 src/cleanlab_codex/types/validator.py    |  2 +-
 src/cleanlab_codex/validator.py          | 57 +++++++++++++++++++++-
 tests/internal/test_validator.py         |  3 +-
 4 files changed, 63 insertions(+), 61 deletions(-)

diff --git a/src/cleanlab_codex/internal/validator.py b/src/cleanlab_codex/internal/validator.py
index 1c5c54f..0526f9e 100644
--- a/src/cleanlab_codex/internal/validator.py
+++ b/src/cleanlab_codex/internal/validator.py
@@ -1,8 +1,6 @@
 from __future__ import annotations
 
-from typing import TYPE_CHECKING, Any, cast
-
-from pydantic import BaseModel, Field, field_validator
+from typing import TYPE_CHECKING, Any
 
 from cleanlab_codex.utils.errors import MissingDependencyError
 
@@ -16,66 +14,13 @@
 
 if TYPE_CHECKING:
     from cleanlab_codex.types.validator import ThresholdedTrustworthyRAGScore
+    from cleanlab_codex.validator import BadResponseThresholds
 
 
 """Evaluation metrics (excluding trustworthiness) that are used to determine if a response is bad."""
 DEFAULT_EVAL_METRICS = ["response_helpfulness"]
 
 
-class BadResponseThresholds(BaseModel):
-    """Config for determining if a response is bad.
-    Each key is an evaluation metric and the value is a threshold such that if the score is below the threshold, the response is bad.
-    """
-
-    trustworthiness: float = Field(
-        description="Threshold for trustworthiness.",
-        default=0.5,
-        ge=0.0,
-        le=1.0,
-    )
-    response_helpfulness: float = Field(
-        description="Threshold for response helpfulness.",
-        default=0.5,
-        ge=0.0,
-        le=1.0,
-    )
-
-    @property
-    def default_threshold(self) -> float:
-        """The default threshold to use when a specific evaluation metric's threshold is not set. This threshold is set to 0.5."""
-        return 0.5
-
-    def get_threshold(self, eval_name: str) -> float:
-        """Get threshold for an eval if it exists.
-
-        For fields defined in the model, returns their value (which may be the field's default).
-        For custom evals not defined in the model, returns the default threshold value (see `default_threshold`).
-        """
-
-        # For fields defined in the model, use their value (which may be the field's default)
-        if eval_name in self.model_fields:
-            return cast(float, getattr(self, eval_name))
-
-        # For custom evals, use the default threshold
-        return getattr(self, eval_name, self.default_threshold)
-
-    @field_validator("*")
-    @classmethod
-    def validate_threshold(cls, v: Any) -> float:
-        """Validate that all fields (including dynamic ones) are floats between 0 and 1."""
-        if not isinstance(v, (int, float)):
-            error_msg = f"Threshold must be a number, got {type(v)}"
-            raise TypeError(error_msg)
-        if not 0 <= float(v) <= 1:
-            error_msg = f"Threshold must be between 0 and 1, got {v}"
-            raise ValueError(error_msg)
-        return float(v)
-
-    model_config = {
-        "extra": "allow"  # Allow additional fields for custom eval thresholds
-    }
-
-
 def get_default_evaluations() -> list[Eval]:
     """Get the default evaluations for the TrustworthyRAG.
 
@@ -95,7 +40,8 @@ def get_default_trustworthyrag_config() -> dict[str, Any]:
 
 
 def is_bad_response(
-    scores: TrustworthyRAGScore | ThresholdedTrustworthyRAGScore, thresholds: BadResponseThresholds
+    scores: TrustworthyRAGScore | ThresholdedTrustworthyRAGScore,
+    thresholds: BadResponseThresholds,
 ) -> bool:
     """
     Check if the response is bad based on the scores computed by TrustworthyRAG and the config containing thresholds.
diff --git a/src/cleanlab_codex/types/validator.py b/src/cleanlab_codex/types/validator.py
index 25b5a1d..930273f 100644
--- a/src/cleanlab_codex/types/validator.py
+++ b/src/cleanlab_codex/types/validator.py
@@ -32,4 +32,4 @@ class ThresholdedTrustworthyRAGScore(dict[str, ThresholdedEvalMetric]):
             ...
         }
         ```
-    """
\ No newline at end of file
+    """
diff --git a/src/cleanlab_codex/validator.py b/src/cleanlab_codex/validator.py
index 76bb760..92251a3 100644
--- a/src/cleanlab_codex/validator.py
+++ b/src/cleanlab_codex/validator.py
@@ -6,8 +6,9 @@
 
 from typing import Any, Optional, cast
 
+from pydantic import BaseModel, Field, field_validator
+
 from cleanlab_codex.internal.validator import (
-    BadResponseThresholds,
     get_default_evaluations,
     get_default_trustworthyrag_config,
 )
@@ -25,6 +26,60 @@
     ) from e
 
 
+class BadResponseThresholds(BaseModel):
+    """Config for determining if a response is bad.
+    Each key is an evaluation metric and the value is a threshold such that if the score is below the threshold, the response is bad.
+    """
+
+    trustworthiness: float = Field(
+        description="Threshold for trustworthiness.",
+        default=0.5,
+        ge=0.0,
+        le=1.0,
+    )
+    response_helpfulness: float = Field(
+        description="Threshold for response helpfulness.",
+        default=0.5,
+        ge=0.0,
+        le=1.0,
+    )
+
+    @property
+    def default_threshold(self) -> float:
+        """The default threshold to use when a specific evaluation metric's threshold is not set. This threshold is set to 0.5."""
+        return 0.5
+
+    def get_threshold(self, eval_name: str) -> float:
+        """Get threshold for an eval if it exists.
+
+        For fields defined in the model, returns their value (which may be the field's default).
+        For custom evals not defined in the model, returns the default threshold value (see `default_threshold`).
+        """
+
+        # For fields defined in the model, use their value (which may be the field's default)
+        if eval_name in self.model_fields:
+            return cast(float, getattr(self, eval_name))
+
+        # For custom evals, use the default threshold
+        return getattr(self, eval_name, self.default_threshold)
+
+    @field_validator("*")
+    @classmethod
+    def validate_threshold(cls, v: Any) -> float:
+        """Validate that all fields (including dynamic ones) are floats between 0 and 1."""
+        if not isinstance(v, (int, float)):
+            error_msg = f"Threshold must be a number, got {type(v)}"
+            raise TypeError(error_msg)
+        if not 0 <= float(v) <= 1:
+            error_msg = f"Threshold must be between 0 and 1, got {v}"
+            raise ValueError(error_msg)
+        return float(v)
+
+    model_config = {
+        "extra": "allow"  # Allow additional fields for custom eval thresholds
+    }
+
+
 class Validator:
     def __init__(
         self,
diff --git a/tests/internal/test_validator.py b/tests/internal/test_validator.py
index 45b9719..a2f3146 100644
--- a/tests/internal/test_validator.py
+++ b/tests/internal/test_validator.py
@@ -3,7 +3,8 @@
 import pytest
 from cleanlab_tlm.utils.rag import TrustworthyRAGScore
 
-from cleanlab_codex.internal.validator import BadResponseThresholds, get_default_evaluations, is_bad_response
+from cleanlab_codex.internal.validator import get_default_evaluations, is_bad_response
+from cleanlab_codex.validator import BadResponseThresholds
 
 
 def make_scores(trustworthiness: float, response_helpfulness: float) -> TrustworthyRAGScore:

From 29e231abb8b5d13b1cc3eb46fcb8177ed9913650 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?El=C3=ADas=20Snorrason?= <eliassno@gmail.com>
Date: Mon, 24 Mar 2025 11:32:53 -0700
Subject: [PATCH 08/27] add prompt and form_prompt

---
 src/cleanlab_codex/internal/validator.py |  9 ++++++
 src/cleanlab_codex/validator.py          | 38 +++++++++++++++++-------
 2 files changed, 37 insertions(+), 10 deletions(-)

diff --git a/src/cleanlab_codex/internal/validator.py b/src/cleanlab_codex/internal/validator.py
index 0526f9e..3cc55b9 100644
--- a/src/cleanlab_codex/internal/validator.py
+++ b/src/cleanlab_codex/internal/validator.py
@@ -39,6 +39,15 @@ def get_default_trustworthyrag_config() -> dict[str, Any]:
     }
 
 
+def update_scores_based_on_thresholds(scores: dict[dict[str, Any]], thresholds: BadResponseThresholds) -> None:
+    """Adds a `"""
+    detection_flag = "is_bad"
+    for eval_name, score_dict in scores.items():
+        score_dict.setdefault(detection_flag, False)
+        if (score := score_dict["score"]) is not None:
+            score_dict[detection_flag] = score < thresholds.get_threshold(eval_name)
+
+
 def is_bad_response(
     scores: TrustworthyRAGScore | ThresholdedTrustworthyRAGScore,
     thresholds: BadResponseThresholds,
diff --git a/src/cleanlab_codex/validator.py b/src/cleanlab_codex/validator.py
index 92251a3..2929845 100644
--- a/src/cleanlab_codex/validator.py
+++ b/src/cleanlab_codex/validator.py
@@ -4,7 +4,7 @@
 
 from __future__ import annotations
 
-from typing import Any, Optional, cast
+from typing import Any, Callable, Optional, cast
 
 from pydantic import BaseModel, Field, field_validator
 
@@ -13,6 +13,7 @@
     get_default_trustworthyrag_config,
 )
 from cleanlab_codex.internal.validator import is_bad_response as _is_bad_response
+from cleanlab_codex.internal.validator import update_scores_based_on_thresholds as _update_scores_based_on_thresholds
 from cleanlab_codex.project import Project
 from cleanlab_codex.types.validator import ThresholdedTrustworthyRAGScore
 from cleanlab_codex.utils.errors import MissingDependencyError
@@ -123,7 +124,14 @@ def __init__(
             error_msg = f"Found thresholds for non-existent evaluation metrics: {_extra_thresholds}"
             raise ValueError(error_msg)
 
-    def validate(self, query: str, context: str, response: str) -> dict[str, Any]:
+    def validate(
+        self,
+        query: str,
+        context: str,
+        response: str,
+        prompt: Optional[str] = None,
+        form_prompt: Optional[Callable[[str, str], str]] = None,
+    ) -> dict[str, Any]:
         """Evaluate whether the AI-generated response is bad, and if so, request an alternate expert response.
 
         Args:
@@ -137,7 +145,7 @@ def validate(self, query: str, context: str, response: str) -> dict[str, Any]:
                 - 'expert_answer': Alternate SME-provided answer from Codex, or None if no answer could be found in the Codex Project.
                 - Additional keys: Various keys from a [`ThresholdedTrustworthyRAGScore`](/cleanlab_codex/types/validator/#class-thresholdedtrustworthyragscore) dictionary, with raw scores from [TrustworthyRAG](/tlm/api/python/utils.rag/#class-trustworthyrag) for each evaluation metric.  `is_bad` indicating whether the score is below the threshold.
         """
-        scores, is_bad_response = self.detect(query, context, response)
+        scores, is_bad_response = self.detect(query, context, response, prompt, form_prompt)
         expert_answer = None
         if is_bad_response:
             expert_answer = self.remediate(query)
@@ -148,7 +156,14 @@ def validate(self, query: str, context: str, response: str) -> dict[str, Any]:
             **scores,
         }
 
-    def detect(self, query: str, context: str, response: str) -> tuple[ThresholdedTrustworthyRAGScore, bool]:
+    def detect(
+        self,
+        query: str,
+        context: str,
+        response: str,
+        prompt: Optional[str] = None,
+        form_prompt: Optional[Callable[[str, str], str]] = None,
+    ) -> tuple[ThresholdedTrustworthyRAGScore, bool]:
         """Evaluate the response quality using TrustworthyRAG and determine if it is a bad response.
 
         Args:
@@ -164,14 +179,17 @@ def detect(self, query: str, context: str, response: str) -> tuple[ThresholdedTr
                   and configured thresholds, False otherwise.
         """
         scores = cast(
-            ThresholdedTrustworthyRAGScore, self._tlm_rag.score(response=response, query=query, context=context)
+            ThresholdedTrustworthyRAGScore,
+            self._tlm_rag.score(
+                response=response,
+                query=query,
+                context=context,
+                prompt=prompt,
+                form_prompt=form_prompt,
+            ),
         )
 
-        # Enhance each score dictionary with its threshold check
-        for eval_name, score_dict in scores.items():
-            score_dict.setdefault("is_bad", False)
-            if (score := score_dict["score"]) is not None:
-                score_dict["is_bad"] = score < self._bad_response_thresholds.get_threshold(eval_name)
+        _update_scores_based_on_thresholds(scores, thresholds=self._bad_response_thresholds)
 
         is_bad_response = _is_bad_response(scores, self._bad_response_thresholds)
         return scores, is_bad_response

From a741e159a99eb6b77c149c16911e373d618525d2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?El=C3=ADas=20Snorrason?= <eliassno@gmail.com>
Date: Mon, 24 Mar 2025 11:46:56 -0700
Subject: [PATCH 09/27] fix formatting and type hints

---
 src/cleanlab_codex/internal/validator.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/src/cleanlab_codex/internal/validator.py b/src/cleanlab_codex/internal/validator.py
index 3cc55b9..67ceb12 100644
--- a/src/cleanlab_codex/internal/validator.py
+++ b/src/cleanlab_codex/internal/validator.py
@@ -39,13 +39,14 @@ def get_default_trustworthyrag_config() -> dict[str, Any]:
     }
 
 
-def update_scores_based_on_thresholds(scores: dict[dict[str, Any]], thresholds: BadResponseThresholds) -> None:
-    """Adds a `"""
-    detection_flag = "is_bad"
+def update_scores_based_on_thresholds(
+    scores: ThresholdedTrustworthyRAGScore, thresholds: BadResponseThresholds
+) -> None:
+    """Adds a `is_bad` flag to the scores dictionary."""
     for eval_name, score_dict in scores.items():
-        score_dict.setdefault(detection_flag, False)
+        score_dict.setdefault("is_bad", False)
         if (score := score_dict["score"]) is not None:
-            score_dict[detection_flag] = score < thresholds.get_threshold(eval_name)
+            score_dict["is_bad"] = score < thresholds.get_threshold(eval_name)
 
 
 def is_bad_response(

From 380b1efdadbe88f5032240fc15a5d18cce8d7f77 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?El=C3=ADas=20Snorrason?= <eliassno@gmail.com>
Date: Mon, 24 Mar 2025 11:55:49 -0700
Subject: [PATCH 10/27] update docstrings

---
 src/cleanlab_codex/internal/validator.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/cleanlab_codex/internal/validator.py b/src/cleanlab_codex/internal/validator.py
index 67ceb12..c9b97de 100644
--- a/src/cleanlab_codex/internal/validator.py
+++ b/src/cleanlab_codex/internal/validator.py
@@ -42,7 +42,7 @@ def get_default_trustworthyrag_config() -> dict[str, Any]:
 def update_scores_based_on_thresholds(
     scores: ThresholdedTrustworthyRAGScore, thresholds: BadResponseThresholds
 ) -> None:
-    """Adds a `is_bad` flag to the scores dictionary."""
+    """Adds a `is_bad` flag to the scores dictionaries based on the thresholds."""
     for eval_name, score_dict in scores.items():
         score_dict.setdefault("is_bad", False)
         if (score := score_dict["score"]) is not None:

From 4f40e3d89ef5f2fabf1b70562ea6a5d4d3fd0b5b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?El=C3=ADas=20Snorrason?= <eliassno@gmail.com>
Date: Mon, 24 Mar 2025 21:18:50 -0700
Subject: [PATCH 11/27] Add unit tests for Validator and BadResponseThresholds

---
 tests/test_validator.py | 172 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 172 insertions(+)
 create mode 100644 tests/test_validator.py

diff --git a/tests/test_validator.py b/tests/test_validator.py
new file mode 100644
index 0000000..cc74972
--- /dev/null
+++ b/tests/test_validator.py
@@ -0,0 +1,172 @@
+from unittest.mock import Mock, patch
+
+from pydantic import ValidationError
+import pytest
+
+from cleanlab_codex.validator import BadResponseThresholds, Validator
+
+
+class TestBadResponseThresholds:
+    def test_get_threshold(self):
+        thresholds = BadResponseThresholds(
+            trustworthiness=0.5,
+            response_helpfulness=0.5,
+        )
+        assert thresholds.get_threshold("trustworthiness") == 0.5
+        assert thresholds.get_threshold("response_helpfulness") == 0.5
+
+    def test_default_threshold(self):
+        thresholds = BadResponseThresholds()
+        assert thresholds.get_threshold("trustworthiness") == 0.5
+        assert thresholds.get_threshold("response_helpfulness") == 0.5
+
+    def test_unspecified_threshold(self):
+        thresholds = BadResponseThresholds()
+        assert thresholds.get_threshold("unspecified_threshold") == 0.5
+        
+    def test_threshold_value(self):
+        thresholds = BadResponseThresholds(valid_threshold=0.3)
+        assert thresholds.get_threshold("valid_threshold") == 0.3
+        assert thresholds.valid_threshold == 0.3
+
+    def test_invalid_threshold_value(self):
+        with pytest.raises(ValidationError):
+            BadResponseThresholds(trustworthiness=1.1)
+            
+        with pytest.raises(ValidationError):
+            BadResponseThresholds(response_helpfulness=-0.1)
+
+    def test_invalid_threshold_type(self):
+        with pytest.raises(ValidationError):
+            BadResponseThresholds(trustworthiness="not a number")
+
+
+@pytest.fixture
+def mock_project():
+    with patch("cleanlab_codex.validator.Project") as mock:
+        mock.from_access_key.return_value = Mock()
+        yield mock
+
+
+@pytest.fixture
+def mock_trustworthy_rag():
+    mock = Mock()
+    mock.score.return_value = {
+        "trustworthiness": {
+            "score": 0.8,
+            "is_bad": False
+        },
+        "response_helpfulness": {
+            "score": 0.7,
+            "is_bad": False
+        }
+    }
+    eval_mock = Mock()
+    eval_mock.name = "response_helpfulness"
+    mock.get_evals.return_value = [eval_mock]
+    with patch("cleanlab_codex.validator.TrustworthyRAG") as mock_class:
+        mock_class.return_value = mock
+        yield mock_class
+
+
+class TestValidator:
+    def test_init(self, mock_project, mock_trustworthy_rag):
+        Validator(codex_access_key="test")
+        
+        # Verify Project was initialized with access key
+        mock_project.from_access_key.assert_called_once_with(access_key="test")
+        
+        # Verify TrustworthyRAG was initialized with default config
+        mock_trustworthy_rag.assert_called_once()
+        
+    def test_init_with_tlm_api_key(self, mock_project, mock_trustworthy_rag):
+        Validator(codex_access_key="test", tlm_api_key="tlm-key")
+        
+        # Verify TrustworthyRAG was initialized with API key
+        config = mock_trustworthy_rag.call_args[1]
+        assert config["api_key"] == "tlm-key"
+        
+    def test_init_with_config_conflict(self, mock_project, mock_trustworthy_rag):
+        with pytest.raises(ValueError, match="Cannot specify both tlm_api_key and api_key in trustworthy_rag_config"):
+            Validator(
+                codex_access_key="test",
+                tlm_api_key="tlm-key",
+                trustworthy_rag_config={"api_key": "config-key"}
+            )
+            
+    def test_validate(self, mock_project, mock_trustworthy_rag):
+        validator = Validator(codex_access_key="test")
+        
+        result = validator.validate(
+            query="test query",
+            context="test context",
+            response="test response"
+        )
+        
+        # Verify TrustworthyRAG.score was called
+        mock_trustworthy_rag.return_value.score.assert_called_once_with(
+            response="test response",
+            query="test query",
+            context="test context",
+            prompt=None,
+            form_prompt=None
+        )
+        
+        # Verify expected result structure
+        assert result["is_bad_response"] is False
+        assert result["expert_answer"] is None
+        
+        eval_metrics = ["trustworthiness", "response_helpfulness"]
+        for metric in eval_metrics:
+            assert metric in result
+            assert "score" in result[metric]
+            assert "is_bad" in result[metric]
+            
+    def test_validate_expert_answer(self, mock_project, mock_trustworthy_rag):
+        # Setup mock project query response
+        mock_project.from_access_key.return_value.query.return_value = ("expert answer", None)
+        
+        # Basically any response will be flagged as untrustworthy
+        validator = Validator(codex_access_key="test", bad_response_thresholds={"trustworthiness": 1.0})
+        result = validator.validate(
+            query="test query",
+            context="test context",
+            response="test response"
+        )
+        assert result["expert_answer"] == "expert answer"
+        
+        mock_project.from_access_key.return_value.query.return_value = (None, None)
+        result = validator.validate(
+            query="test query",
+            context="test context",
+            response="test response"
+        )
+        assert result["expert_answer"] is None
+        
+        
+    def test_detect(self, mock_project, mock_trustworthy_rag):
+        validator = Validator(codex_access_key="test")
+        
+        scores, is_bad = validator.detect(
+            query="test query",
+            context="test context", 
+            response="test response"
+        )
+        
+        # Verify scores match mock return value
+        assert scores["trustworthiness"]["score"] == 0.8
+        assert scores["response_helpfulness"]["score"] == 0.7
+        assert not is_bad  # Since mock scores are above default thresholds
+        
+    def test_remediate(self, mock_project, mock_trustworthy_rag):
+        # Setup mock project query response
+        mock_project.from_access_key.return_value.query.return_value = ("expert answer", None)
+        
+        validator = Validator(codex_access_key="test")
+        result = validator.remediate("test query")
+        
+        # Verify project.query was called
+        mock_project.from_access_key.return_value.query.assert_called_once_with(
+            question="test query"
+        )
+        assert result == "expert answer"
\ No newline at end of file

From 02b16e0ca1e1ad6020f9f211a8d0f462c9204308 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?El=C3=ADas=20Snorrason?= <eliassno@gmail.com>
Date: Mon, 24 Mar 2025 21:24:43 -0700
Subject: [PATCH 12/27] include type hints and fix formatting

---
 tests/test_validator.py | 134 +++++++++++++++-------------------------
 1 file changed, 51 insertions(+), 83 deletions(-)

diff --git a/tests/test_validator.py b/tests/test_validator.py
index cc74972..abcff32 100644
--- a/tests/test_validator.py
+++ b/tests/test_validator.py
@@ -1,13 +1,14 @@
+from typing import Generator
 from unittest.mock import Mock, patch
 
-from pydantic import ValidationError
 import pytest
+from pydantic import ValidationError
 
 from cleanlab_codex.validator import BadResponseThresholds, Validator
 
 
 class TestBadResponseThresholds:
-    def test_get_threshold(self):
+    def test_get_threshold(self) -> None:
         thresholds = BadResponseThresholds(
             trustworthiness=0.5,
             response_helpfulness=0.5,
@@ -15,51 +16,45 @@ def test_get_threshold(self):
         assert thresholds.get_threshold("trustworthiness") == 0.5
         assert thresholds.get_threshold("response_helpfulness") == 0.5
 
-    def test_default_threshold(self):
+    def test_default_threshold(self) -> None:
         thresholds = BadResponseThresholds()
         assert thresholds.get_threshold("trustworthiness") == 0.5
         assert thresholds.get_threshold("response_helpfulness") == 0.5
 
-    def test_unspecified_threshold(self):
+    def test_unspecified_threshold(self) -> None:
         thresholds = BadResponseThresholds()
         assert thresholds.get_threshold("unspecified_threshold") == 0.5
-        
-    def test_threshold_value(self):
-        thresholds = BadResponseThresholds(valid_threshold=0.3)
+
+    def test_threshold_value(self) -> None:
+        thresholds = BadResponseThresholds(valid_threshold=0.3)  # type: ignore
         assert thresholds.get_threshold("valid_threshold") == 0.3
-        assert thresholds.valid_threshold == 0.3
+        assert thresholds.valid_threshold == 0.3  # type: ignore
 
-    def test_invalid_threshold_value(self):
+    def test_invalid_threshold_value(self) -> None:
         with pytest.raises(ValidationError):
             BadResponseThresholds(trustworthiness=1.1)
-            
+
         with pytest.raises(ValidationError):
             BadResponseThresholds(response_helpfulness=-0.1)
 
-    def test_invalid_threshold_type(self):
+    def test_invalid_threshold_type(self) -> None:
         with pytest.raises(ValidationError):
-            BadResponseThresholds(trustworthiness="not a number")
+            BadResponseThresholds(trustworthiness="not a number")  # type: ignore
 
 
 @pytest.fixture
-def mock_project():
+def mock_project() -> Generator[Mock, None, None]:
     with patch("cleanlab_codex.validator.Project") as mock:
         mock.from_access_key.return_value = Mock()
         yield mock
 
 
 @pytest.fixture
-def mock_trustworthy_rag():
+def mock_trustworthy_rag() -> Generator[Mock, None, None]:
     mock = Mock()
     mock.score.return_value = {
-        "trustworthiness": {
-            "score": 0.8,
-            "is_bad": False
-        },
-        "response_helpfulness": {
-            "score": 0.7,
-            "is_bad": False
-        }
+        "trustworthiness": {"score": 0.8, "is_bad": False},
+        "response_helpfulness": {"score": 0.7, "is_bad": False},
     }
     eval_mock = Mock()
     eval_mock.name = "response_helpfulness"
@@ -70,103 +65,76 @@ def mock_trustworthy_rag():
 
 
 class TestValidator:
-    def test_init(self, mock_project, mock_trustworthy_rag):
+    def test_init(self, mock_project: Mock, mock_trustworthy_rag: Mock) -> None:
         Validator(codex_access_key="test")
-        
+
         # Verify Project was initialized with access key
         mock_project.from_access_key.assert_called_once_with(access_key="test")
-        
+
         # Verify TrustworthyRAG was initialized with default config
         mock_trustworthy_rag.assert_called_once()
-        
-    def test_init_with_tlm_api_key(self, mock_project, mock_trustworthy_rag):
+
+    def test_init_with_tlm_api_key(self, mock_project: Mock, mock_trustworthy_rag: Mock) -> None:  # noqa: ARG002
         Validator(codex_access_key="test", tlm_api_key="tlm-key")
-        
+
         # Verify TrustworthyRAG was initialized with API key
         config = mock_trustworthy_rag.call_args[1]
         assert config["api_key"] == "tlm-key"
-        
-    def test_init_with_config_conflict(self, mock_project, mock_trustworthy_rag):
+
+    def test_init_with_config_conflict(self, mock_project: Mock, mock_trustworthy_rag: Mock) -> None:  # noqa: ARG002
         with pytest.raises(ValueError, match="Cannot specify both tlm_api_key and api_key in trustworthy_rag_config"):
-            Validator(
-                codex_access_key="test",
-                tlm_api_key="tlm-key",
-                trustworthy_rag_config={"api_key": "config-key"}
-            )
-            
-    def test_validate(self, mock_project, mock_trustworthy_rag):
+            Validator(codex_access_key="test", tlm_api_key="tlm-key", trustworthy_rag_config={"api_key": "config-key"})
+
+    def test_validate(self, mock_project: Mock, mock_trustworthy_rag: Mock) -> None:  # noqa: ARG002
         validator = Validator(codex_access_key="test")
-        
-        result = validator.validate(
-            query="test query",
-            context="test context",
-            response="test response"
-        )
-        
+
+        result = validator.validate(query="test query", context="test context", response="test response")
+
         # Verify TrustworthyRAG.score was called
         mock_trustworthy_rag.return_value.score.assert_called_once_with(
-            response="test response",
-            query="test query",
-            context="test context",
-            prompt=None,
-            form_prompt=None
+            response="test response", query="test query", context="test context", prompt=None, form_prompt=None
         )
-        
+
         # Verify expected result structure
         assert result["is_bad_response"] is False
         assert result["expert_answer"] is None
-        
+
         eval_metrics = ["trustworthiness", "response_helpfulness"]
         for metric in eval_metrics:
             assert metric in result
             assert "score" in result[metric]
             assert "is_bad" in result[metric]
-            
-    def test_validate_expert_answer(self, mock_project, mock_trustworthy_rag):
+
+    def test_validate_expert_answer(self, mock_project: Mock, mock_trustworthy_rag: Mock) -> None:  # noqa: ARG002
         # Setup mock project query response
         mock_project.from_access_key.return_value.query.return_value = ("expert answer", None)
-        
+
         # Basically any response will be flagged as untrustworthy
         validator = Validator(codex_access_key="test", bad_response_thresholds={"trustworthiness": 1.0})
-        result = validator.validate(
-            query="test query",
-            context="test context",
-            response="test response"
-        )
+        result = validator.validate(query="test query", context="test context", response="test response")
         assert result["expert_answer"] == "expert answer"
-        
+
         mock_project.from_access_key.return_value.query.return_value = (None, None)
-        result = validator.validate(
-            query="test query",
-            context="test context",
-            response="test response"
-        )
+        result = validator.validate(query="test query", context="test context", response="test response")
         assert result["expert_answer"] is None
-        
-        
-    def test_detect(self, mock_project, mock_trustworthy_rag):
+
+    def test_detect(self, mock_project: Mock, mock_trustworthy_rag: Mock) -> None:  # noqa: ARG002
         validator = Validator(codex_access_key="test")
-        
-        scores, is_bad = validator.detect(
-            query="test query",
-            context="test context", 
-            response="test response"
-        )
-        
+
+        scores, is_bad = validator.detect(query="test query", context="test context", response="test response")
+
         # Verify scores match mock return value
         assert scores["trustworthiness"]["score"] == 0.8
         assert scores["response_helpfulness"]["score"] == 0.7
         assert not is_bad  # Since mock scores are above default thresholds
-        
-    def test_remediate(self, mock_project, mock_trustworthy_rag):
+
+    def test_remediate(self, mock_project: Mock, mock_trustworthy_rag: Mock) -> None:  # noqa: ARG002
         # Setup mock project query response
         mock_project.from_access_key.return_value.query.return_value = ("expert answer", None)
-        
+
         validator = Validator(codex_access_key="test")
         result = validator.remediate("test query")
-        
+
         # Verify project.query was called
-        mock_project.from_access_key.return_value.query.assert_called_once_with(
-            question="test query"
-        )
-        assert result == "expert answer"
\ No newline at end of file
+        mock_project.from_access_key.return_value.query.assert_called_once_with(question="test query")
+        assert result == "expert answer"

From 873f55218954da741a9150437c07941bf8c78c72 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?El=C3=ADas=20Snorrason?= <eliassno@gmail.com>
Date: Tue, 25 Mar 2025 14:45:50 -0700
Subject: [PATCH 13/27] set "expert_answer" as first key

---
 src/cleanlab_codex/validator.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/cleanlab_codex/validator.py b/src/cleanlab_codex/validator.py
index 2929845..855fd6e 100644
--- a/src/cleanlab_codex/validator.py
+++ b/src/cleanlab_codex/validator.py
@@ -151,8 +151,8 @@ def validate(
             expert_answer = self.remediate(query)
 
         return {
-            "is_bad_response": is_bad_response,
             "expert_answer": expert_answer,
+            "is_bad_response": is_bad_response,
             **scores,
         }
 

From b4713712299ae4f18767c805bb8fda28d605aea5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?El=C3=ADas=20Snorrason?= <eliassno@gmail.com>
Date: Tue, 25 Mar 2025 16:48:39 -0700
Subject: [PATCH 14/27] clean up imports, type hints and docs

---
 src/cleanlab_codex/internal/validator.py | 48 +++++++++---------------
 src/cleanlab_codex/validator.py          | 45 ++++++++++------------
 tests/internal/test_validator.py         | 26 +------------
 3 files changed, 38 insertions(+), 81 deletions(-)

diff --git a/src/cleanlab_codex/internal/validator.py b/src/cleanlab_codex/internal/validator.py
index c9b97de..0914c02 100644
--- a/src/cleanlab_codex/internal/validator.py
+++ b/src/cleanlab_codex/internal/validator.py
@@ -1,19 +1,12 @@
 from __future__ import annotations
 
-from typing import TYPE_CHECKING, Any
+from typing import TYPE_CHECKING, Any, Optional, Sequence, cast
 
-from cleanlab_codex.utils.errors import MissingDependencyError
+from cleanlab_tlm.utils.rag import Eval, TrustworthyRAGScore, get_default_evals
 
-try:
-    from cleanlab_tlm.utils.rag import Eval, TrustworthyRAGScore, get_default_evals
-except ImportError as e:
-    raise MissingDependencyError(
-        import_name=e.name or "cleanlab-tlm",
-        package_url="https://github.com/cleanlab/cleanlab-tlm",
-    ) from e
+from cleanlab_codex.types.validator import ThresholdedTrustworthyRAGScore
 
 if TYPE_CHECKING:
-    from cleanlab_codex.types.validator import ThresholdedTrustworthyRAGScore
     from cleanlab_codex.validator import BadResponseThresholds
 
 
@@ -40,26 +33,21 @@ def get_default_trustworthyrag_config() -> dict[str, Any]:
 
 
 def update_scores_based_on_thresholds(
-    scores: ThresholdedTrustworthyRAGScore, thresholds: BadResponseThresholds
-) -> None:
+    scores: TrustworthyRAGScore | Sequence[TrustworthyRAGScore], thresholds: BadResponseThresholds
+) -> ThresholdedTrustworthyRAGScore:
     """Adds a `is_bad` flag to the scores dictionaries based on the thresholds."""
-    for eval_name, score_dict in scores.items():
-        score_dict.setdefault("is_bad", False)
-        if (score := score_dict["score"]) is not None:
-            score_dict["is_bad"] = score < thresholds.get_threshold(eval_name)
 
+    # Helper function to check if a score is bad
+    def is_bad(score: Optional[float], threshold: float) -> bool:
+        return score is not None and score < threshold
 
-def is_bad_response(
-    scores: TrustworthyRAGScore | ThresholdedTrustworthyRAGScore,
-    thresholds: BadResponseThresholds,
-) -> bool:
-    """
-    Check if the response is bad based on the scores computed by TrustworthyRAG and the config containing thresholds.
-    """
-    for eval_metric, score_dict in scores.items():
-        score = score_dict["score"]
-        if score is None:
-            continue
-        if score < thresholds.get_threshold(eval_metric):
-            return True
-    return False
+    if isinstance(scores, Sequence):
+        raise NotImplementedError("Batching is not supported yet.")
+
+    thresholded_scores = {}
+    for eval_name, score_dict in scores.items():
+        thresholded_scores[eval_name] = {
+            **score_dict,
+            "is_bad": is_bad(score_dict["score"], thresholds.get_threshold(eval_name)),
+        }
+    return cast(ThresholdedTrustworthyRAGScore, thresholded_scores)
diff --git a/src/cleanlab_codex/validator.py b/src/cleanlab_codex/validator.py
index 855fd6e..e4238a6 100644
--- a/src/cleanlab_codex/validator.py
+++ b/src/cleanlab_codex/validator.py
@@ -4,27 +4,20 @@
 
 from __future__ import annotations
 
-from typing import Any, Callable, Optional, cast
+from typing import TYPE_CHECKING, Any, Callable, Optional, cast
 
+from cleanlab_tlm import TrustworthyRAG
 from pydantic import BaseModel, Field, field_validator
 
 from cleanlab_codex.internal.validator import (
     get_default_evaluations,
     get_default_trustworthyrag_config,
 )
-from cleanlab_codex.internal.validator import is_bad_response as _is_bad_response
 from cleanlab_codex.internal.validator import update_scores_based_on_thresholds as _update_scores_based_on_thresholds
 from cleanlab_codex.project import Project
-from cleanlab_codex.types.validator import ThresholdedTrustworthyRAGScore
-from cleanlab_codex.utils.errors import MissingDependencyError
 
-try:
-    from cleanlab_tlm import TrustworthyRAG
-except ImportError as e:
-    raise MissingDependencyError(
-        import_name=e.name or "cleanlab-tlm",
-        package_url="https://github.com/cleanlab/cleanlab-tlm",
-    ) from e
+if TYPE_CHECKING:
+    from cleanlab_codex.types.validator import ThresholdedTrustworthyRAGScore
 
 
 class BadResponseThresholds(BaseModel):
@@ -141,8 +134,8 @@ def validate(
 
         Returns:
             dict[str, Any]: A dictionary containing:
-                - 'is_bad_response': True if the response is flagged as potentially bad, False otherwise.
-                - 'expert_answer': Alternate SME-provided answer from Codex, or None if no answer could be found in the Codex Project.
+                - 'expert_answer': Alternate SME-provided answer from Codex if the response was flagged as bad and an answer was found, or None otherwise.
+                - 'is_bad_response': True if the response is flagged as potentially bad (when True, a lookup in Codex is performed), False otherwise.
                 - Additional keys: Various keys from a [`ThresholdedTrustworthyRAGScore`](/cleanlab_codex/types/validator/#class-thresholdedtrustworthyragscore) dictionary, with raw scores from [TrustworthyRAG](/tlm/api/python/utils.rag/#class-trustworthyrag) for each evaluation metric.  `is_bad` indicating whether the score is below the threshold.
         """
         scores, is_bad_response = self.detect(query, context, response, prompt, form_prompt)
@@ -164,7 +157,7 @@ def detect(
         prompt: Optional[str] = None,
         form_prompt: Optional[Callable[[str, str], str]] = None,
     ) -> tuple[ThresholdedTrustworthyRAGScore, bool]:
-        """Evaluate the response quality using TrustworthyRAG and determine if it is a bad response.
+        """Evaluate the response quality using TrustworthyRAG and determine if it is a bad response via thresholding.
 
         Args:
             query (str): The user query that was used to generate the response.
@@ -178,21 +171,21 @@ def detect(
                 - bool: True if the response is determined to be bad based on the evaluation scores
                   and configured thresholds, False otherwise.
         """
-        scores = cast(
-            ThresholdedTrustworthyRAGScore,
-            self._tlm_rag.score(
-                response=response,
-                query=query,
-                context=context,
-                prompt=prompt,
-                form_prompt=form_prompt,
-            ),
+        scores = self._tlm_rag.score(
+            response=response,
+            query=query,
+            context=context,
+            prompt=prompt,
+            form_prompt=form_prompt,
         )
 
-        _update_scores_based_on_thresholds(scores, thresholds=self._bad_response_thresholds)
+        thresholded_scores = _update_scores_based_on_thresholds(
+            scores=scores,
+            thresholds=self._bad_response_thresholds,
+        )
 
-        is_bad_response = _is_bad_response(scores, self._bad_response_thresholds)
-        return scores, is_bad_response
+        is_bad_response = any(score_dict["is_bad"] for score_dict in thresholded_scores.values())
+        return thresholded_scores, is_bad_response
 
     def remediate(self, query: str) -> str | None:
         """Request a SME-provided answer for this query, if one is available in Codex.
diff --git a/tests/internal/test_validator.py b/tests/internal/test_validator.py
index a2f3146..b2d059e 100644
--- a/tests/internal/test_validator.py
+++ b/tests/internal/test_validator.py
@@ -1,9 +1,8 @@
 from typing import cast
 
-import pytest
 from cleanlab_tlm.utils.rag import TrustworthyRAGScore
 
-from cleanlab_codex.internal.validator import get_default_evaluations, is_bad_response
+from cleanlab_codex.internal.validator import get_default_evaluations
 from cleanlab_codex.validator import BadResponseThresholds
 
 
@@ -28,26 +27,3 @@ def make_is_bad_response_config(trustworthiness: float, response_helpfulness: fl
 
 def test_get_default_evaluations() -> None:
     assert {evaluation.name for evaluation in get_default_evaluations()} == {"response_helpfulness"}
-
-
-class TestIsBadResponse:
-    @pytest.fixture
-    def scores(self) -> TrustworthyRAGScore:
-        return make_scores(0.92, 0.75)
-
-    @pytest.fixture
-    def custom_is_bad_response_config(self) -> BadResponseThresholds:
-        return make_is_bad_response_config(0.6, 0.7)
-
-    def test_thresholds(self, scores: TrustworthyRAGScore) -> None:
-        # High trustworthiness_threshold
-        is_bad_response_config = make_is_bad_response_config(0.921, 0.5)
-        assert is_bad_response(scores, is_bad_response_config)
-
-        # High response_helpfulness_threshold
-        is_bad_response_config = make_is_bad_response_config(0.5, 0.751)
-        assert is_bad_response(scores, is_bad_response_config)
-
-    def test_scores(self, custom_is_bad_response_config: BadResponseThresholds) -> None:
-        scores = make_scores(0.59, 0.7)
-        assert is_bad_response(scores, custom_is_bad_response_config)

From be4745ce489ae84e1ba184e2d5d9cc7a78d2f048 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?El=C3=ADas=20Snorrason?= <eliassno@gmail.com>
Date: Tue, 25 Mar 2025 17:27:51 -0700
Subject: [PATCH 15/27] Update pyproject.toml

Co-authored-by: Anish Athalye <me@anishathalye.com>
---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 6b36862..ecba729 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -25,7 +25,7 @@ classifiers = [
   "Programming Language :: Python :: Implementation :: PyPy",
 ]
 dependencies = [
-  "cleanlab-tlm>=1.0.12",
+  "cleanlab-tlm~=1.0.12",
   "codex-sdk==0.1.0a12",
   "pydantic>=2.0.0, <3",
 ]

From 54e866b2aff3fef47c6869230f07449f1fb4b901 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?El=C3=ADas=20Snorrason?= <eliassno@gmail.com>
Date: Tue, 25 Mar 2025 18:30:05 -0700
Subject: [PATCH 16/27] Update response_validation.py docstring to indicate
 module deprecation in favor of the new Validator API.

---
 src/cleanlab_codex/response_validation.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/cleanlab_codex/response_validation.py b/src/cleanlab_codex/response_validation.py
index e3bf78a..d46a5e0 100644
--- a/src/cleanlab_codex/response_validation.py
+++ b/src/cleanlab_codex/response_validation.py
@@ -1,4 +1,6 @@
 """
+This module is now superseded by this [Validator API](/codex/api/validator/).
+
 Validation functions for evaluating LLM responses and determining if they should be replaced with Codex-generated alternatives.
 """
 

From c63262540a2ff0843caaa9489137811ee8388aa0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?El=C3=ADas=20Snorrason?= <eliassno@gmail.com>
Date: Wed, 26 Mar 2025 13:06:53 -0700
Subject: [PATCH 17/27] make remediate method private

---
 src/cleanlab_codex/validator.py | 4 ++--
 tests/test_validator.py         | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/cleanlab_codex/validator.py b/src/cleanlab_codex/validator.py
index e4238a6..1f8fd93 100644
--- a/src/cleanlab_codex/validator.py
+++ b/src/cleanlab_codex/validator.py
@@ -141,7 +141,7 @@ def validate(
         scores, is_bad_response = self.detect(query, context, response, prompt, form_prompt)
         expert_answer = None
         if is_bad_response:
-            expert_answer = self.remediate(query)
+            expert_answer = self._remediate(query)
 
         return {
             "expert_answer": expert_answer,
@@ -187,7 +187,7 @@ def detect(
         is_bad_response = any(score_dict["is_bad"] for score_dict in thresholded_scores.values())
         return thresholded_scores, is_bad_response
 
-    def remediate(self, query: str) -> str | None:
+    def _remediate(self, query: str) -> str | None:
         """Request a SME-provided answer for this query, if one is available in Codex.
 
         Args:
diff --git a/tests/test_validator.py b/tests/test_validator.py
index abcff32..cdc2b21 100644
--- a/tests/test_validator.py
+++ b/tests/test_validator.py
@@ -133,7 +133,7 @@ def test_remediate(self, mock_project: Mock, mock_trustworthy_rag: Mock) -> None
         mock_project.from_access_key.return_value.query.return_value = ("expert answer", None)
 
         validator = Validator(codex_access_key="test")
-        result = validator.remediate("test query")
+        result = validator._remediate("test query")
 
         # Verify project.query was called
         mock_project.from_access_key.return_value.query.assert_called_once_with(question="test query")

From d422bcf9f55448deb7f1999f3acf05265bd785e5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?El=C3=ADas=20Snorrason?= <eliassno@gmail.com>
Date: Wed, 26 Mar 2025 13:11:14 -0700
Subject: [PATCH 18/27] update docstrings

---
 src/cleanlab_codex/validator.py | 50 ++++++++++++++++++++++++++++-----
 1 file changed, 43 insertions(+), 7 deletions(-)

diff --git a/src/cleanlab_codex/validator.py b/src/cleanlab_codex/validator.py
index 1f8fd93..e9949a2 100644
--- a/src/cleanlab_codex/validator.py
+++ b/src/cleanlab_codex/validator.py
@@ -23,6 +23,11 @@
 class BadResponseThresholds(BaseModel):
     """Config for determining if a response is bad.
     Each key is an evaluation metric and the value is a threshold such that if the score is below the threshold, the response is bad.
+    
+    Default Thresholds:
+        - trustworthiness: 0.5
+        - response_helpfulness: 0.5
+        - Any custom eval: 0.5 (if not explicitly specified in bad_response_thresholds)    
     """
 
     trustworthiness: float = Field(
@@ -82,15 +87,41 @@ def __init__(
         trustworthy_rag_config: Optional[dict[str, Any]] = None,
         bad_response_thresholds: Optional[dict[str, float]] = None,
     ):
-        """Evaluates the quality of responses generated in RAG applications and remediates them if needed.
+        """Real-time detection and remediation of bad responses in RAG applications, powered by Cleanlab's TrustworthyRAG and Codex.
 
-        This object combines Cleanlab's various Evals with thresholding to detect bad responses and remediates them with Codex.
+        This object combines Cleanlab's TrustworthyRAG evaluation scores with configurable thresholds to detect potentially bad responses 
+        in your RAG application. When a bad response is detected, it automatically attempts to remediate by retrieving an expert-provided 
+        answer from your Codex project.
+
+        For most use cases, we recommend using the `validate()` method which provides a complete validation workflow including 
+        both detection and Codex remediation. The `detect()` method is available separately for testing and threshold tuning purposes
+        without triggering a Codex lookup.
+
+        By default, this uses the same default configurations as [`TrustworthyRAG`](/tlm/api/python/utils.rag/#class-trustworthyrag), except:
+            - Explanations are returned in logs for better debugging
+            - Only the `response_helpfulness` eval is run
 
         Args:
-            codex_access_key (str): The [access key](/codex/web_tutorials/create_project/#access-keys) for a Codex project.
-            tlm_api_key (Optional[str]): The API key for [TrustworthyRAG](/tlm/api/python/utils.rag/#class-trustworthyrag).
-            trustworthy_rag_config (Optional[dict[str, Any]]): Optional initialization arguments for [TrustworthyRAG](/tlm/api/python/utils.rag/#class-trustworthyrag), which is used to detect response issues.
-            bad_response_thresholds (Optional[dict[str, float]]): Detection score thresholds used to flag whether or not a response is considered bad. Each key in this dict corresponds to an Eval from TrustworthyRAG, and the value indicates a threshold below which scores from this Eval are considered detected issues.  A response is flagged as bad if any issues are detected for it.
+            codex_access_key (str): The [access key](/codex/web_tutorials/create_project/#access-keys) for a Codex project. Used to retrieve expert-provided answers 
+                when bad responses are detected.
+
+            tlm_api_key (str, optional): API key for accessing [TrustworthyRAG](/tlm/api/python/utils.rag/#class-trustworthyrag). If not provided, this must be specified 
+                in trustworthy_rag_config.
+
+            trustworthy_rag_config (dict[str, Any], optional): Optional initialization arguments for [TrustworthyRAG](/tlm/api/python/utils.rag/#class-trustworthyrag), 
+                which is used to detect response issues. If not provided, default configuration will be used.
+
+            bad_response_thresholds (dict[str, float], optional): Detection score thresholds used to flag whether 
+                a response is considered bad. Each key corresponds to an Eval from TrustworthyRAG, and the value 
+                indicates a threshold (between 0 and 1) below which scores are considered detected issues. A response 
+                is flagged as bad if any issues are detected. If not provided, default thresholds will be used. See 
+                [`BadResponseThresholds`](/codex/api/python/validator/#class-badresponsethresholds) for more details.
+                
+        Raises:
+            ValueError: If both tlm_api_key and api_key in trustworthy_rag_config are provided.
+            ValueError: If bad_response_thresholds contains thresholds for non-existent evaluation metrics.
+            TypeError: If any threshold value is not a number.
+            ValueError: If any threshold value is not between 0 and 1.
         """
         trustworthy_rag_config = trustworthy_rag_config or get_default_trustworthyrag_config()
         if tlm_api_key is not None and "api_key" in trustworthy_rag_config:
@@ -157,7 +188,12 @@ def detect(
         prompt: Optional[str] = None,
         form_prompt: Optional[Callable[[str, str], str]] = None,
     ) -> tuple[ThresholdedTrustworthyRAGScore, bool]:
-        """Evaluate the response quality using TrustworthyRAG and determine if it is a bad response via thresholding.
+        """Score response quality using TrustworthyRAG and flag bad responses based on configured thresholds.
+        
+        Note:
+            This method is primarily intended for testing and threshold tuning purposes. For production use cases,
+            we recommend using the `validate()` method which provides a complete validation workflow including 
+            Codex remediation.
 
         Args:
             query (str): The user query that was used to generate the response.

From 2ae9b0f5bb91f5276b18a06258b8537b8e615059 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?El=C3=ADas=20Snorrason?= <eliassno@gmail.com>
Date: Wed, 26 Mar 2025 17:15:59 -0700
Subject: [PATCH 19/27] Update types/response_validation.py docstring to
 indicate module deprecation in favor of the new Validator API.

---
 src/cleanlab_codex/types/response_validation.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/cleanlab_codex/types/response_validation.py b/src/cleanlab_codex/types/response_validation.py
index 1479e2d..2504578 100644
--- a/src/cleanlab_codex/types/response_validation.py
+++ b/src/cleanlab_codex/types/response_validation.py
@@ -1,4 +1,7 @@
-"""Types for response validation."""
+"""
+This module is now superseded by this [Validator API](/codex/api/validator/).
+
+Types for response validation."""
 
 from abc import ABC, abstractmethod
 from collections import OrderedDict

From 7322026d83c5bc28623eafd087b211a96a9ad2e1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?El=C3=ADas=20Snorrason?= <eliassno@gmail.com>
Date: Wed, 26 Mar 2025 17:16:06 -0700
Subject: [PATCH 20/27] formatting

---
 src/cleanlab_codex/validator.py | 30 +++++++++++++++---------------
 tests/test_validator.py         |  2 +-
 2 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/src/cleanlab_codex/validator.py b/src/cleanlab_codex/validator.py
index e9949a2..b137aea 100644
--- a/src/cleanlab_codex/validator.py
+++ b/src/cleanlab_codex/validator.py
@@ -23,11 +23,11 @@
 class BadResponseThresholds(BaseModel):
     """Config for determining if a response is bad.
     Each key is an evaluation metric and the value is a threshold such that if the score is below the threshold, the response is bad.
-    
+
     Default Thresholds:
         - trustworthiness: 0.5
         - response_helpfulness: 0.5
-        - Any custom eval: 0.5 (if not explicitly specified in bad_response_thresholds)    
+        - Any custom eval: 0.5 (if not explicitly specified in bad_response_thresholds)
     """
 
     trustworthiness: float = Field(
@@ -89,11 +89,11 @@ def __init__(
     ):
         """Real-time detection and remediation of bad responses in RAG applications, powered by Cleanlab's TrustworthyRAG and Codex.
 
-        This object combines Cleanlab's TrustworthyRAG evaluation scores with configurable thresholds to detect potentially bad responses 
-        in your RAG application. When a bad response is detected, it automatically attempts to remediate by retrieving an expert-provided 
+        This object combines Cleanlab's TrustworthyRAG evaluation scores with configurable thresholds to detect potentially bad responses
+        in your RAG application. When a bad response is detected, it automatically attempts to remediate by retrieving an expert-provided
         answer from your Codex project.
 
-        For most use cases, we recommend using the `validate()` method which provides a complete validation workflow including 
+        For most use cases, we recommend using the `validate()` method which provides a complete validation workflow including
         both detection and Codex remediation. The `detect()` method is available separately for testing and threshold tuning purposes
         without triggering a Codex lookup.
 
@@ -102,21 +102,21 @@ def __init__(
             - Only the `response_helpfulness` eval is run
 
         Args:
-            codex_access_key (str): The [access key](/codex/web_tutorials/create_project/#access-keys) for a Codex project. Used to retrieve expert-provided answers 
+            codex_access_key (str): The [access key](/codex/web_tutorials/create_project/#access-keys) for a Codex project. Used to retrieve expert-provided answers
                 when bad responses are detected.
 
-            tlm_api_key (str, optional): API key for accessing [TrustworthyRAG](/tlm/api/python/utils.rag/#class-trustworthyrag). If not provided, this must be specified 
+            tlm_api_key (str, optional): API key for accessing [TrustworthyRAG](/tlm/api/python/utils.rag/#class-trustworthyrag). If not provided, this must be specified
                 in trustworthy_rag_config.
 
-            trustworthy_rag_config (dict[str, Any], optional): Optional initialization arguments for [TrustworthyRAG](/tlm/api/python/utils.rag/#class-trustworthyrag), 
+            trustworthy_rag_config (dict[str, Any], optional): Optional initialization arguments for [TrustworthyRAG](/tlm/api/python/utils.rag/#class-trustworthyrag),
                 which is used to detect response issues. If not provided, default configuration will be used.
 
-            bad_response_thresholds (dict[str, float], optional): Detection score thresholds used to flag whether 
-                a response is considered bad. Each key corresponds to an Eval from TrustworthyRAG, and the value 
-                indicates a threshold (between 0 and 1) below which scores are considered detected issues. A response 
-                is flagged as bad if any issues are detected. If not provided, default thresholds will be used. See 
+            bad_response_thresholds (dict[str, float], optional): Detection score thresholds used to flag whether
+                a response is considered bad. Each key corresponds to an Eval from TrustworthyRAG, and the value
+                indicates a threshold (between 0 and 1) below which scores are considered detected issues. A response
+                is flagged as bad if any issues are detected. If not provided, default thresholds will be used. See
                 [`BadResponseThresholds`](/codex/api/python/validator/#class-badresponsethresholds) for more details.
-                
+
         Raises:
             ValueError: If both tlm_api_key and api_key in trustworthy_rag_config are provided.
             ValueError: If bad_response_thresholds contains thresholds for non-existent evaluation metrics.
@@ -189,10 +189,10 @@ def detect(
         form_prompt: Optional[Callable[[str, str], str]] = None,
     ) -> tuple[ThresholdedTrustworthyRAGScore, bool]:
         """Score response quality using TrustworthyRAG and flag bad responses based on configured thresholds.
-        
+
         Note:
             This method is primarily intended for testing and threshold tuning purposes. For production use cases,
-            we recommend using the `validate()` method which provides a complete validation workflow including 
+            we recommend using the `validate()` method which provides a complete validation workflow including
             Codex remediation.
 
         Args:
diff --git a/tests/test_validator.py b/tests/test_validator.py
index cdc2b21..3193dbf 100644
--- a/tests/test_validator.py
+++ b/tests/test_validator.py
@@ -133,7 +133,7 @@ def test_remediate(self, mock_project: Mock, mock_trustworthy_rag: Mock) -> None
         mock_project.from_access_key.return_value.query.return_value = ("expert answer", None)
 
         validator = Validator(codex_access_key="test")
-        result = validator._remediate("test query")
+        result = validator._remediate("test query")  # noqa: SLF001
 
         # Verify project.query was called
         mock_project.from_access_key.return_value.query.assert_called_once_with(question="test query")

From 8089c177c3441eaa2e47a39693b367bc5b8232ae Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?El=C3=ADas=20Snorrason?= <eliassno@gmail.com>
Date: Wed, 26 Mar 2025 17:16:48 -0700
Subject: [PATCH 21/27] update changelog

---
 CHANGELOG.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 17e7ab3..a005c5e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+- Add `Validator` API
+- Deprecate `response_validation.py` module.
+
 ## [1.0.4] - 2025-03-14
 
 - Pass analytics metadata in headers for all Codex API requests.

From f8aeb52e044781cb62231074c9fbfe3a02e1a92a Mon Sep 17 00:00:00 2001
From: Jonas Mueller <1390638+jwmueller@users.noreply.github.com>
Date: Thu, 27 Mar 2025 00:51:41 -0400
Subject: [PATCH 22/27] clarify detect v validate further

---
 src/cleanlab_codex/validator.py | 55 +++++++++++++++++----------------
 1 file changed, 28 insertions(+), 27 deletions(-)

diff --git a/src/cleanlab_codex/validator.py b/src/cleanlab_codex/validator.py
index b137aea..7b820bd 100644
--- a/src/cleanlab_codex/validator.py
+++ b/src/cleanlab_codex/validator.py
@@ -1,5 +1,5 @@
 """
-Leverage Cleanlab's Evals and Codex to detect and remediate bad responses in RAG applications.
+Detect and remediate bad responses in RAG applications, by integrating Codex as-a-Backup.
 """
 
 from __future__ import annotations
@@ -22,7 +22,7 @@
 
 class BadResponseThresholds(BaseModel):
     """Config for determining if a response is bad.
-    Each key is an evaluation metric and the value is a threshold such that if the score is below the threshold, the response is bad.
+    Each key is an evaluation metric and the value is a threshold such that a response is considered bad whenever the corresponding evaluation score falls below the threshold.
 
     Default Thresholds:
         - trustworthiness: 0.5
@@ -45,11 +45,11 @@ class BadResponseThresholds(BaseModel):
 
     @property
     def default_threshold(self) -> float:
-        """The default threshold to use when a specific evaluation metric's threshold is not set. This threshold is set to 0.5."""
+        """The default threshold to use when an evaluation metric's threshold is not specified. This threshold is set to 0.5."""
         return 0.5
 
     def get_threshold(self, eval_name: str) -> float:
-        """Get threshold for an eval if it exists.
+        """Get threshold for an eval, if it exists.
 
         For fields defined in the model, returns their value (which may be the field's default).
         For custom evals not defined in the model, returns the default threshold value (see `default_threshold`).
@@ -90,30 +90,30 @@ def __init__(
         """Real-time detection and remediation of bad responses in RAG applications, powered by Cleanlab's TrustworthyRAG and Codex.
 
         This object combines Cleanlab's TrustworthyRAG evaluation scores with configurable thresholds to detect potentially bad responses
-        in your RAG application. When a bad response is detected, it automatically attempts to remediate by retrieving an expert-provided
-        answer from your Codex project.
-
-        For most use cases, we recommend using the `validate()` method which provides a complete validation workflow including
-        both detection and Codex remediation. The `detect()` method is available separately for testing and threshold tuning purposes
-        without triggering a Codex lookup.
-
-        By default, this uses the same default configurations as [`TrustworthyRAG`](/tlm/api/python/utils.rag/#class-trustworthyrag), except:
-            - Explanations are returned in logs for better debugging
-            - Only the `response_helpfulness` eval is run
-
+        in your RAG application. When a bad response is detected, this Validator automatically attempts to remediate by retrieving an expert-provided
+        answer from the Codex Project you've integrated with your RAG app. If no expert answer is available,
+        the corresponding query is logged in the Codex Project for SMEs to answer.
+
+        For production, use the `validate()` method which provides a complete validation workflow including both detection and remediation.
+        A `detect()` method is separately available for you to test/tune detection configurations like score thresholds and TrustworthyRAG settings
+        without triggering any Codex lookups that otherwise could affect the state of the corresponding Codex Project.
+        
         Args:
             codex_access_key (str): The [access key](/codex/web_tutorials/create_project/#access-keys) for a Codex project. Used to retrieve expert-provided answers
-                when bad responses are detected.
+                when bad responses are detected, or otherwise log the corresponding queries for SMEs to answer.
 
             tlm_api_key (str, optional): API key for accessing [TrustworthyRAG](/tlm/api/python/utils.rag/#class-trustworthyrag). If not provided, this must be specified
-                in trustworthy_rag_config.
+                in `trustworthy_rag_config`.
 
             trustworthy_rag_config (dict[str, Any], optional): Optional initialization arguments for [TrustworthyRAG](/tlm/api/python/utils.rag/#class-trustworthyrag),
-                which is used to detect response issues. If not provided, default configuration will be used.
+                which is used to detect response issues. If not provided, a default configuration will be used.
+                By default, this Validator uses the same default configurations as [TrustworthyRAG](/tlm/api/python/utils.rag/#class-trustworthyrag), except:
+                - Explanations are returned in logs for better debugging
+                - Only the `response_helpfulness` eval is run
 
             bad_response_thresholds (dict[str, float], optional): Detection score thresholds used to flag whether
-                a response is considered bad. Each key corresponds to an Eval from TrustworthyRAG, and the value
-                indicates a threshold (between 0 and 1) below which scores are considered detected issues. A response
+                a response is bad or not. Each key corresponds to an Eval from [TrustworthyRAG](/tlm/api/python/utils.rag/#class-trustworthyrag),
+                and the value indicates a threshold (between 0 and 1) below which Eval scores are treated as detected issues. A response
                 is flagged as bad if any issues are detected. If not provided, default thresholds will be used. See
                 [`BadResponseThresholds`](/codex/api/python/validator/#class-badresponsethresholds) for more details.
 
@@ -156,7 +156,8 @@ def validate(
         prompt: Optional[str] = None,
         form_prompt: Optional[Callable[[str, str], str]] = None,
     ) -> dict[str, Any]:
-        """Evaluate whether the AI-generated response is bad, and if so, request an alternate expert response.
+        """Evaluate whether the AI-generated response is bad, and if so, request an alternate expert answer.
+        If no expert answer is available, this query is still logged for SMEs to answer.
 
         Args:
             query (str): The user query that was used to generate the response.
@@ -165,9 +166,9 @@ def validate(
 
         Returns:
             dict[str, Any]: A dictionary containing:
-                - 'expert_answer': Alternate SME-provided answer from Codex if the response was flagged as bad and an answer was found, or None otherwise.
-                - 'is_bad_response': True if the response is flagged as potentially bad (when True, a lookup in Codex is performed), False otherwise.
-                - Additional keys: Various keys from a [`ThresholdedTrustworthyRAGScore`](/cleanlab_codex/types/validator/#class-thresholdedtrustworthyragscore) dictionary, with raw scores from [TrustworthyRAG](/tlm/api/python/utils.rag/#class-trustworthyrag) for each evaluation metric.  `is_bad` indicating whether the score is below the threshold.
+                - 'expert_answer': Alternate SME-provided answer from Codex if the response was flagged as bad and an answer was found in the Codex Project, or None otherwise.
+                - 'is_bad_response': True if the response is flagged as potentially bad, False otherwise. When True, a Codex lookup is performed, which logs this query into the Codex Project for SMEs to answer.
+                - Additional keys from a [`ThresholdedTrustworthyRAGScore`](/cleanlab_codex/types/validator/#class-thresholdedtrustworthyragscore) dictionary: each corresponds to a [TrustworthyRAG](/tlm/api/python/utils.rag/#class-trustworthyrag) evaluation metric, and points to the score for this evaluation as well as a boolean `is_bad` flagging whether the score falls below the corresponding threshold.
         """
         scores, is_bad_response = self.detect(query, context, response, prompt, form_prompt)
         expert_answer = None
@@ -191,9 +192,9 @@ def detect(
         """Score response quality using TrustworthyRAG and flag bad responses based on configured thresholds.
 
         Note:
-            This method is primarily intended for testing and threshold tuning purposes. For production use cases,
-            we recommend using the `validate()` method which provides a complete validation workflow including
-            Codex remediation.
+            Use this method instead of `validate()` to test/tune detection configurations like score thresholds and TrustworthyRAG settings.
+            This `detect()` method will not affect your Codex Project, whereas `validate()` will log queries whose response was detected as bad into the Codex Project and is thus only suitable for production, not testing.
+            Both this method and `validate()` rely on this same detection logic, so you can use this method to first optimize detections and then switch to using `validate()`.
 
         Args:
             query (str): The user query that was used to generate the response.

From 0f602e3a570cf26d29009fce0312e9f313ccb442 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?El=C3=ADas=20Snorrason?= <eliassno@gmail.com>
Date: Thu, 27 Mar 2025 00:09:15 -0700
Subject: [PATCH 23/27] add prompt and format_prompt to docstrings

---
 src/cleanlab_codex/validator.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/cleanlab_codex/validator.py b/src/cleanlab_codex/validator.py
index 7b820bd..3552e81 100644
--- a/src/cleanlab_codex/validator.py
+++ b/src/cleanlab_codex/validator.py
@@ -163,6 +163,8 @@ def validate(
             query (str): The user query that was used to generate the response.
             context (str): The context that was retrieved from the RAG Knowledge Base and used to generate the response.
             response (str): A reponse from your LLM/RAG system.
+            prompt (str, optional): Optional prompt representing the actual inputs (combining query, context, and system instructions into one string) to the LLM that generated the response.
+            form_prompt (Callable[[str, str], str], optional): Optional function to format the prompt based on query and context. Cannot be provided together with prompt, provide one or the other. This function should take query and context as parameters and return a formatted prompt string. If not provided, a default prompt formatter will be used. To include a system prompt or any other special instructions for your LLM, incorporate them directly in your custom form_prompt() function definition.
 
         Returns:
             dict[str, Any]: A dictionary containing:
@@ -200,6 +202,8 @@ def detect(
             query (str): The user query that was used to generate the response.
             context (str): The context that was retrieved from the RAG Knowledge Base and used to generate the response.
             response (str): A reponse from your LLM/RAG system.
+            prompt (str, optional): Optional prompt representing the actual inputs (combining query, context, and system instructions into one string) to the LLM that generated the response.
+            form_prompt (Callable[[str, str], str], optional): Optional function to format the prompt based on query and context. Cannot be provided together with prompt, provide one or the other. This function should take query and context as parameters and return a formatted prompt string. If not provided, a default prompt formatter will be used. To include a system prompt or any other special instructions for your LLM, incorporate them directly in your custom form_prompt() function definition.
 
         Returns:
             tuple[ThresholdedTrustworthyRAGScore, bool]: A tuple containing:

From 76ca4c3b58a38c6ce0aaebd6c1668b1931ebb973 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?El=C3=ADas=20Snorrason?= <eliassno@gmail.com>
Date: Thu, 27 Mar 2025 00:10:53 -0700
Subject: [PATCH 24/27] formatting

---
 src/cleanlab_codex/validator.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/cleanlab_codex/validator.py b/src/cleanlab_codex/validator.py
index 3552e81..df95346 100644
--- a/src/cleanlab_codex/validator.py
+++ b/src/cleanlab_codex/validator.py
@@ -97,7 +97,7 @@ def __init__(
         For production, use the `validate()` method which provides a complete validation workflow including both detection and remediation.
         A `detect()` method is separately available for you to test/tune detection configurations like score thresholds and TrustworthyRAG settings
         without triggering any Codex lookups that otherwise could affect the state of the corresponding Codex Project.
-        
+
         Args:
             codex_access_key (str): The [access key](/codex/web_tutorials/create_project/#access-keys) for a Codex project. Used to retrieve expert-provided answers
                 when bad responses are detected, or otherwise log the corresponding queries for SMEs to answer.

From 3e4d8bb4cbeea99c1807cb81247d48c9582c83fd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?El=C3=ADas=20Snorrason?= <eliassno@gmail.com>
Date: Thu, 27 Mar 2025 09:32:33 -0700
Subject: [PATCH 25/27] deprecated

---
 src/cleanlab_codex/response_validation.py       | 2 +-
 src/cleanlab_codex/types/response_validation.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/cleanlab_codex/response_validation.py b/src/cleanlab_codex/response_validation.py
index d46a5e0..dca2877 100644
--- a/src/cleanlab_codex/response_validation.py
+++ b/src/cleanlab_codex/response_validation.py
@@ -1,7 +1,7 @@
 """
 This module is now superseded by this [Validator API](/codex/api/validator/).
 
-Validation functions for evaluating LLM responses and determining if they should be replaced with Codex-generated alternatives.
+Deprecated validation functions for evaluating LLM responses and determining if they should be replaced with Codex-generated alternatives.
 """
 
 from __future__ import annotations
diff --git a/src/cleanlab_codex/types/response_validation.py b/src/cleanlab_codex/types/response_validation.py
index 2504578..e0e5b26 100644
--- a/src/cleanlab_codex/types/response_validation.py
+++ b/src/cleanlab_codex/types/response_validation.py
@@ -1,7 +1,7 @@
 """
 This module is now superseded by this [Validator API](/codex/api/validator/).
 
-Types for response validation."""
+Deprecated types for response validation."""
 
 from abc import ABC, abstractmethod
 from collections import OrderedDict

From ac8762f1c4d1ae08ec5944317cd564f440976ae3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?El=C3=ADas=20Snorrason?= <eliassno@gmail.com>
Date: Thu, 27 Mar 2025 13:40:48 -0700
Subject: [PATCH 26/27] Update CHANGELOG for version 1.0.5

---
 CHANGELOG.md | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index a005c5e..7e26398 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+## [1.0.5] - 2025-03-27
+
 - Add `Validator` API
 - Deprecate `response_validation.py` module.
 
@@ -32,7 +34,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 - Initial release of the `cleanlab-codex` client library.
 
-[Unreleased]: https://github.com/cleanlab/cleanlab-codex/compare/v1.0.4...HEAD
+[Unreleased]: https://github.com/cleanlab/cleanlab-codex/compare/v1.0.5...HEAD
+[1.0.5]: https://github.com/cleanlab/cleanlab-codex/compare/v1.0.4...v1.0.5
 [1.0.4]: https://github.com/cleanlab/cleanlab-codex/compare/v1.0.3...v1.0.4
 [1.0.3]: https://github.com/cleanlab/cleanlab-codex/compare/v1.0.2...v1.0.3
 [1.0.2]: https://github.com/cleanlab/cleanlab-codex/compare/v1.0.1...v1.0.2

From 84799a5d8edb40997aaef0fa83bff622564e1a36 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?El=C3=ADas=20Snorrason?= <eliassno@gmail.com>
Date: Thu, 27 Mar 2025 13:48:29 -0700
Subject: [PATCH 27/27] Swap order of classes

---
 src/cleanlab_codex/validator.py | 118 ++++++++++++++++----------------
 1 file changed, 59 insertions(+), 59 deletions(-)

diff --git a/src/cleanlab_codex/validator.py b/src/cleanlab_codex/validator.py
index df95346..81365b3 100644
--- a/src/cleanlab_codex/validator.py
+++ b/src/cleanlab_codex/validator.py
@@ -20,65 +20,6 @@
     from cleanlab_codex.types.validator import ThresholdedTrustworthyRAGScore
 
 
-class BadResponseThresholds(BaseModel):
-    """Config for determining if a response is bad.
-    Each key is an evaluation metric and the value is a threshold such that a response is considered bad whenever the corresponding evaluation score falls below the threshold.
-
-    Default Thresholds:
-        - trustworthiness: 0.5
-        - response_helpfulness: 0.5
-        - Any custom eval: 0.5 (if not explicitly specified in bad_response_thresholds)
-    """
-
-    trustworthiness: float = Field(
-        description="Threshold for trustworthiness.",
-        default=0.5,
-        ge=0.0,
-        le=1.0,
-    )
-    response_helpfulness: float = Field(
-        description="Threshold for response helpfulness.",
-        default=0.5,
-        ge=0.0,
-        le=1.0,
-    )
-
-    @property
-    def default_threshold(self) -> float:
-        """The default threshold to use when an evaluation metric's threshold is not specified. This threshold is set to 0.5."""
-        return 0.5
-
-    def get_threshold(self, eval_name: str) -> float:
-        """Get threshold for an eval, if it exists.
-
-        For fields defined in the model, returns their value (which may be the field's default).
-        For custom evals not defined in the model, returns the default threshold value (see `default_threshold`).
-        """
-
-        # For fields defined in the model, use their value (which may be the field's default)
-        if eval_name in self.model_fields:
-            return cast(float, getattr(self, eval_name))
-
-        # For custom evals, use the default threshold
-        return getattr(self, eval_name, self.default_threshold)
-
-    @field_validator("*")
-    @classmethod
-    def validate_threshold(cls, v: Any) -> float:
-        """Validate that all fields (including dynamic ones) are floats between 0 and 1."""
-        if not isinstance(v, (int, float)):
-            error_msg = f"Threshold must be a number, got {type(v)}"
-            raise TypeError(error_msg)
-        if not 0 <= float(v) <= 1:
-            error_msg = f"Threshold must be between 0 and 1, got {v}"
-            raise ValueError(error_msg)
-        return float(v)
-
-    model_config = {
-        "extra": "allow"  # Allow additional fields for custom eval thresholds
-    }
-
-
 class Validator:
     def __init__(
         self,
@@ -239,3 +180,62 @@ def _remediate(self, query: str) -> str | None:
         """
         codex_answer, _ = self._project.query(question=query)
         return codex_answer
+
+
+class BadResponseThresholds(BaseModel):
+    """Config for determining if a response is bad.
+    Each key is an evaluation metric and the value is a threshold such that a response is considered bad whenever the corresponding evaluation score falls below the threshold.
+
+    Default Thresholds:
+        - trustworthiness: 0.5
+        - response_helpfulness: 0.5
+        - Any custom eval: 0.5 (if not explicitly specified in bad_response_thresholds)
+    """
+
+    trustworthiness: float = Field(
+        description="Threshold for trustworthiness.",
+        default=0.5,
+        ge=0.0,
+        le=1.0,
+    )
+    response_helpfulness: float = Field(
+        description="Threshold for response helpfulness.",
+        default=0.5,
+        ge=0.0,
+        le=1.0,
+    )
+
+    @property
+    def default_threshold(self) -> float:
+        """The default threshold to use when an evaluation metric's threshold is not specified. This threshold is set to 0.5."""
+        return 0.5
+
+    def get_threshold(self, eval_name: str) -> float:
+        """Get threshold for an eval, if it exists.
+
+        For fields defined in the model, returns their value (which may be the field's default).
+        For custom evals not defined in the model, returns the default threshold value (see `default_threshold`).
+        """
+
+        # For fields defined in the model, use their value (which may be the field's default)
+        if eval_name in self.model_fields:
+            return cast(float, getattr(self, eval_name))
+
+        # For custom evals, use the default threshold
+        return getattr(self, eval_name, self.default_threshold)
+
+    @field_validator("*")
+    @classmethod
+    def validate_threshold(cls, v: Any) -> float:
+        """Validate that all fields (including dynamic ones) are floats between 0 and 1."""
+        if not isinstance(v, (int, float)):
+            error_msg = f"Threshold must be a number, got {type(v)}"
+            raise TypeError(error_msg)
+        if not 0 <= float(v) <= 1:
+            error_msg = f"Threshold must be between 0 and 1, got {v}"
+            raise ValueError(error_msg)
+        return float(v)
+
+    model_config = {
+        "extra": "allow"  # Allow additional fields for custom eval thresholds
+    }