cleanlab
diff --git a/‎src/cleanlab_codex/internal/validator.py‎
Lines changed: 13 additions & 103 deletions b/‎src/cleanlab_codex/internal/validator.py‎
Lines changed: 13 additions & 103 deletions
diff --git a/‎src/cleanlab_codex/project.py‎
Lines changed: 47 additions & 6 deletions b/‎src/cleanlab_codex/project.py‎
Lines changed: 47 additions & 6 deletions
@@ -1,110 +1,20 @@
 from __future__ import annotations
 
-from typing import TYPE_CHECKING, Any, Optional, Sequence, cast
 
-from cleanlab_tlm.utils.rag import Eval, TrustworthyRAGScore, get_default_evals
-
-from cleanlab_codex.types.validator import ThresholdedTrustworthyRAGScore
-
-if TYPE_CHECKING:
-    from cleanlab_codex.validator import BadResponseThresholds
-
-
-"""Evaluation metrics (excluding trustworthiness) that are used to determine if a response is bad."""
-DEFAULT_EVAL_METRICS = ["response_helpfulness"]
-
-# Simple mappings for is_bad keys
-_SCORE_TO_IS_BAD_KEY = {
-    "trustworthiness": "is_not_trustworthy",
-    "query_ease": "is_not_query_easy",
-    "response_helpfulness": "is_not_response_helpful",
-    "context_sufficiency": "is_not_context_sufficient",
-}
-
-
-def get_default_evaluations() -> list[Eval]:
-    """Get the default evaluations for the TrustworthyRAG.
-
-    Note:
-        This excludes trustworthiness, which is automatically computed by TrustworthyRAG.
-    """
-    return [evaluation for evaluation in get_default_evals() if evaluation.name in DEFAULT_EVAL_METRICS]
-
-
-def get_default_trustworthyrag_config() -> dict[str, Any]:
-    """Get the default configuration for the TrustworthyRAG."""
-    return {
-        "options": {
-            "log": ["explanation"],
-        },
-    }
-
-
-def update_scores_based_on_thresholds(
-    scores: TrustworthyRAGScore | Sequence[TrustworthyRAGScore], thresholds: BadResponseThresholds
-) -> ThresholdedTrustworthyRAGScore:
-    """Adds a `is_bad` flag to the scores dictionaries based on the thresholds."""
-
-    # Helper function to check if a score is bad
-    def is_bad(score: Optional[float], threshold: float) -> bool:
-        return score is not None and score < threshold
-
-    if isinstance(scores, Sequence):
-        raise NotImplementedError("Batching is not supported yet.")
-
-    thresholded_scores = {}
-    for eval_name, score_dict in scores.items():
-        thresholded_scores[eval_name] = {
-            **score_dict,
-            "is_bad": is_bad(score_dict["score"], thresholds.get_threshold(eval_name)),
-        }
-    return cast(ThresholdedTrustworthyRAGScore, thresholded_scores)
-
-
-def process_score_metadata(scores: ThresholdedTrustworthyRAGScore, thresholds: BadResponseThresholds) -> dict[str, Any]:
-    """Process scores into metadata format with standardized keys.
+def validate_thresholds(thresholds: dict[str, float]) -> None:
+    """Validate that all threshold values are between 0 and 1.
 
     Args:
-        scores: The ThresholdedTrustworthyRAGScore containing evaluation results
-        thresholds: The BadResponseThresholds configuration
+        thresholds: Dictionary mapping eval names to their threshold values.
 
-    Returns:
-        dict: A dictionary containing evaluation scores and their corresponding metadata
+    Raises:
+        TypeError: If any threshold value is not a number.
+        ValueError: If any threshold value is not between 0 and 1.
     """
-    metadata: dict[str, Any] = {}
-
-    # Process scores and add to metadata
-    for metric, score_data in scores.items():
-        metadata[metric] = score_data["score"]
-
-        # Add is_bad flags with standardized naming
-        is_bad_key = _SCORE_TO_IS_BAD_KEY.get(metric, f"is_not_{metric}")
-        metadata[is_bad_key] = score_data["is_bad"]
-
-        # Special case for trustworthiness explanation
-        if metric == "trustworthiness" and "log" in score_data and "explanation" in score_data["log"]:
-            metadata["explanation"] = score_data["log"]["explanation"]
-
-    # Add thresholds to metadata
-    thresholds_dict = thresholds.model_dump()
-    for metric in {k for k in scores if k not in thresholds_dict}:
-        thresholds_dict[metric] = thresholds.get_threshold(metric)
-    metadata["thresholds"] = thresholds_dict
-
-    # TODO: Remove this as the backend can infer this from the is_bad flags
-    metadata["label"] = _get_label(metadata)
-
-    return metadata
-
-
-def _get_label(metadata: dict[str, Any]) -> str:
-    def is_bad(metric: str) -> bool:
-        return bool(metadata.get(_SCORE_TO_IS_BAD_KEY[metric], False))
-
-    if is_bad("context_sufficiency"):
-        return "search_failure"
-    if is_bad("response_helpfulness") or is_bad("query_ease"):
-        return "unhelpful"
-    if is_bad("trustworthiness"):
-        return "hallucination"
-    return "other_issues"
+    for eval_name, threshold in thresholds.items():
+        if not isinstance(threshold, (int, float)):
+            error_msg = f"Threshold for {eval_name} must be a number, got {type(threshold)}"
+            raise TypeError(error_msg)
+        if not 0 <= float(threshold) <= 1:
+            error_msg = f"Threshold for {eval_name} must be between 0 and 1, got {threshold}"
+            raise ValueError(error_msg)
@@ -2,9 +2,10 @@
 
 from __future__ import annotations
 
+import warnings
 from datetime import datetime
 from typing import TYPE_CHECKING as _TYPE_CHECKING
-from typing import Any, Optional
+from typing import Any, Dict, List, Literal, Optional
 
 from codex import AuthenticationError
 
@@ -17,6 +18,8 @@
     from datetime import datetime
 
     from codex import Codex as _Codex
+    from codex.types.project_validate_params import Options as ProjectValidateOptions
+    from codex.types.project_validate_response import ProjectValidateResponse
 
     from cleanlab_codex.types.entry import EntryCreate
 
@@ -152,7 +155,7 @@ def create_access_key(
             raise AuthenticationError(_ERROR_CREATE_ACCESS_KEY, response=e.response, body=e.body) from e
 
     def add_entries(self, entries: list[EntryCreate]) -> None:
-        """Add a list of entries to this Codex project. Must be authenticated with a user-level API key to use this method.
+        """[DEPRECATED] Add a list of entries to this Codex project. Must be authenticated with a user-level API key to use this method.
         See [`Client.create_project()`](/codex/api/python/client#method-create_project) or [`Client.get_project()`](/codex/api/python/client#method-get_project).
 
         Args:
@@ -161,6 +164,11 @@ def add_entries(self, entries: list[EntryCreate]) -> None:
         Raises:
             AuthenticationError: If the Project was created from a project-level access key instead of a [Client instance](/codex/api/python/client#class-client).
         """
+        warnings.warn(
+            "Project.add_entries() is deprecated and will be removed in a future release. ",
+            FutureWarning,
+            stacklevel=2,
+        )
         try:
             # TODO: implement batch creation of entries in backend and update this function
             for entry in entries:
@@ -181,7 +189,7 @@ def query(
         metadata: Optional[dict[str, Any]] = None,
         _analytics_metadata: Optional[_AnalyticsMetadata] = None,
     ) -> tuple[Optional[str], Entry]:
-        """Query Codex to check if this project contains an answer to the question. If the question is not yet in the project, it will be added for SME review.
+        """[DEPRECATED] Query Codex to check if this project contains an answer to the question. If the question is not yet in the project, it will be added for SME review.
 
         Args:
             question (str): The question to ask the Codex API.
@@ -193,6 +201,11 @@ def query(
                 If Codex is able to answer the question, the first element will be the answer returned by Codex and the second element will be the existing [`Entry`](/codex/api/python/types.entry#class-entry) in the Codex project.
                 If Codex is unable to answer the question, the first element will be `fallback_answer` if provided, otherwise None. The second element will be a new [`Entry`](/codex/api/python/types.entry#class-entry) in the Codex project.
         """
+        warnings.warn(
+            "Project.query() is deprecated and will be removed in a future release. Use the Project.validate() function instead.",
+            FutureWarning,
+            stacklevel=2,
+        )
         if not _analytics_metadata:
             _analytics_metadata = _AnalyticsMetadata(integration_type=IntegrationType.BACKUP)
 
@@ -213,7 +226,10 @@ def _query_project(
     ) -> tuple[Optional[str], Entry]:
         extra_headers = analytics_metadata.to_headers() if analytics_metadata else None
         query_res = self._sdk_client.projects.entries.query(
-            self._id, question=question, client_metadata=client_metadata, extra_headers=extra_headers
+            self._id,
+            question=question,
+            client_metadata=client_metadata,
+            extra_headers=extra_headers,
         )
 
         entry = Entry.model_validate(query_res.entry.model_dump())
@@ -222,5 +238,30 @@ def _query_project(
 
         return fallback_answer, entry
 
-    def increment_queries(self) -> None:
-        self._sdk_client.projects.increment_queries(self._id)
+    def validate(
+        self,
+        context: str,
+        prompt: str,
+        query: str,
+        response: str,
+        *,
+        constrain_outputs: Optional[List[str]] = None,
+        custom_metadata: Optional[object] = None,
+        eval_scores: Optional[Dict[str, float]] = None,
+        custom_eval_thresholds: Optional[Dict[str, float]] = None,
+        options: Optional[ProjectValidateOptions] = None,
+        quality_preset: Literal["best", "high", "medium", "low", "base"] = "medium",
+    ) -> ProjectValidateResponse:
+        return self._sdk_client.projects.validate(
+            self._id,
+            context=context,
+            prompt=prompt,
+            query=query,
+            response=response,
+            constrain_outputs=constrain_outputs,
+            custom_eval_thresholds=custom_eval_thresholds,
+            custom_metadata=custom_metadata,
+            eval_scores=eval_scores,
+            options=options,
+            quality_preset=quality_preset,
+        )