Remove support for deprecated entries data model (#88)

LukeMainwaring · web-flow · commit bdedb48478af · 2025-06-17T07:28:18.000-04:00
* Remove support for deprecated entries data model

* re-trigger ci
diff --git a/src/cleanlab_codex/codex_tool.py b/src/cleanlab_codex/codex_tool.py
@@ -6,7 +6,6 @@
 
 from typing_extensions import Annotated
 
-from cleanlab_codex.internal.analytics import IntegrationType, _AnalyticsMetadata
 from cleanlab_codex.project import Project
 from cleanlab_codex.utils.errors import MissingDependencyError
 from cleanlab_codex.utils.function import (
@@ -111,11 +110,16 @@ def query(
         Returns:
             The answer to the question if available. If no answer is available, this returns a fallback answer or None.
         """
-        return self._project.query(
-            question=question,
-            fallback_answer=self._fallback_answer,
-            _analytics_metadata=_AnalyticsMetadata(integration_type=IntegrationType.TOOL),
-        )[0]
+        # We will cut codex-as-a-tool and all client docs in a follow-up PR. This is a temporary setting to avoid throwing errors
+        return (
+            self._project.validate(
+                query=question,
+                response=self._fallback_answer or "",
+                context="",
+                prompt="",
+            ).expert_answer
+            or self._fallback_answer
+        )
 
     def to_openai_tool(self) -> dict[str, Any]:
         """Converts the tool to the expected format for an [OpenAI function tool](https://platform.openai.com/docs/guides/function-calling).
diff --git a/src/cleanlab_codex/project.py b/src/cleanlab_codex/project.py
@@ -2,16 +2,14 @@
 
 from __future__ import annotations
 
-import warnings
 from datetime import datetime
 from typing import TYPE_CHECKING as _TYPE_CHECKING
-from typing import Any, Dict, List, Literal, Optional
+from typing import Dict, List, Literal, Optional
 
 from codex import AuthenticationError
 
-from cleanlab_codex.internal.analytics import IntegrationType, _AnalyticsMetadata
+from cleanlab_codex.internal.analytics import _AnalyticsMetadata
 from cleanlab_codex.internal.sdk_client import client_from_access_key
-from cleanlab_codex.types.entry import Entry
 from cleanlab_codex.types.project import ProjectConfig
 
 if _TYPE_CHECKING:
@@ -21,20 +19,13 @@
     from codex.types.project_validate_params import Options as ProjectValidateOptions
     from codex.types.project_validate_response import ProjectValidateResponse
 
-    from cleanlab_codex.types.entry import EntryCreate
 
 _ERROR_CREATE_ACCESS_KEY = (
     "Failed to create access key. Please ensure you have the necessary permissions "
     "and are using a user-level API key, not a project access key. "
     "See cleanlab_codex.Client.get_project."
 )
 
-_ERROR_ADD_ENTRIES = (
-    "Failed to add entries. Please ensure you have the necessary permissions "
-    "and are using a user-level API key, not a project access key. "
-    "See cleanlab_codex.Client.get_project."
-)
-
 
 class MissingProjectError(Exception):
     """Raised when the project ID or access key does not match any existing project."""
@@ -47,7 +38,6 @@ class Project:
     """Represents a Codex project.
 
     To integrate a Codex project into your RAG/Agentic system, we recommend using one of our abstractions such as [`CodexTool`](/codex/api/python/codex_tool).
-    The [`query`](#method-query) method can also be used directly if none of our existing abstractions are sufficient for your use case.
     """
 
     def __init__(self, sdk_client: _Codex, project_id: str, *, verify_existence: bool = True):
@@ -154,90 +144,6 @@ def create_access_key(
         except AuthenticationError as e:
             raise AuthenticationError(_ERROR_CREATE_ACCESS_KEY, response=e.response, body=e.body) from e
 
-    def add_entries(self, entries: list[EntryCreate]) -> None:
-        """[DEPRECATED] Add a list of entries to this Codex project. Must be authenticated with a user-level API key to use this method.
-        See [`Client.create_project()`](/codex/api/python/client#method-create_project) or [`Client.get_project()`](/codex/api/python/client#method-get_project).
-
-        Args:
-            entries (list[EntryCreate]): The entries to add to this project. See [`EntryCreate`](/codex/api/python/types.entry#class-entrycreate).
-
-        Raises:
-            AuthenticationError: If the Project was created from a project-level access key instead of a [Client instance](/codex/api/python/client#class-client).
-        """
-        warnings.warn(
-            "Project.add_entries() is deprecated and will be removed in a future release. ",
-            FutureWarning,
-            stacklevel=2,
-        )
-        try:
-            # TODO: implement batch creation of entries in backend and update this function
-            for entry in entries:
-                self._sdk_client.projects.entries.create(
-                    self.id,
-                    question=entry["question"],
-                    answer=entry.get("answer"),
-                    extra_headers=_AnalyticsMetadata().to_headers(),
-                )
-        except AuthenticationError as e:
-            raise AuthenticationError(_ERROR_ADD_ENTRIES, response=e.response, body=e.body) from e
-
-    def query(
-        self,
-        question: str,
-        *,
-        fallback_answer: Optional[str] = None,
-        metadata: Optional[dict[str, Any]] = None,
-        _analytics_metadata: Optional[_AnalyticsMetadata] = None,
-    ) -> tuple[Optional[str], Entry]:
-        """[DEPRECATED] Query Codex to check if this project contains an answer to the question. If the question is not yet in the project, it will be added for SME review.
-
-        Args:
-            question (str): The question to ask the Codex API.
-            fallback_answer (str, optional): Optional fallback answer to return if Codex is unable to answer the question.
-            metadata (dict, optional): Additional custom metadata to associate with the query.
-
-        Returns:
-            tuple[Optional[str], Entry]: A tuple representing the answer for the query and the existing or new entry in the Codex project.
-                If Codex is able to answer the question, the first element will be the answer returned by Codex and the second element will be the existing [`Entry`](/codex/api/python/types.entry#class-entry) in the Codex project.
-                If Codex is unable to answer the question, the first element will be `fallback_answer` if provided, otherwise None. The second element will be a new [`Entry`](/codex/api/python/types.entry#class-entry) in the Codex project.
-        """
-        warnings.warn(
-            "Project.query() is deprecated and will be removed in a future release. Use the Project.validate() function instead.",
-            FutureWarning,
-            stacklevel=2,
-        )
-        if not _analytics_metadata:
-            _analytics_metadata = _AnalyticsMetadata(integration_type=IntegrationType.BACKUP)
-
-        return self._query_project(
-            question=question,
-            fallback_answer=fallback_answer,
-            client_metadata=metadata,
-            analytics_metadata=_analytics_metadata,
-        )
-
-    def _query_project(
-        self,
-        question: str,
-        *,
-        fallback_answer: Optional[str] = None,
-        client_metadata: Optional[dict[str, Any]] = None,
-        analytics_metadata: Optional[_AnalyticsMetadata] = None,
-    ) -> tuple[Optional[str], Entry]:
-        extra_headers = analytics_metadata.to_headers() if analytics_metadata else None
-        query_res = self._sdk_client.projects.entries.query(
-            self._id,
-            question=question,
-            client_metadata=client_metadata,
-            extra_headers=extra_headers,
-        )
-
-        entry = Entry.model_validate(query_res.entry.model_dump())
-        if query_res.answer is not None:
-            return query_res.answer, entry
-
-        return fallback_answer, entry
-
     def validate(
         self,
         context: str,
diff --git a/src/cleanlab_codex/types/entry.py b/src/cleanlab_codex/types/entry.py
diff --git a/src/cleanlab_codex/types/project.py b/src/cleanlab_codex/types/project.py
@@ -16,7 +16,7 @@ class ProjectConfig(Config): ...
 
     #### <kbd>property</kbd> max_distance
 
-    Distance threshold used to determine if two questions are similar when querying existing Entries in a project.
+    Distance threshold used to determine if two questions in a project are similar.
     The metric used is cosine distance. Valid threshold values range from 0 (identical vectors) to 1 (orthogonal vectors).
     While cosine distance can extend to 2 (opposite vectors), we limit this value to 1 since finding matches that are less similar than "unrelated" (orthogonal)
     content would not improve results of the system querying the Codex project.
diff --git a/tests/test_codex_tool.py b/tests/test_codex_tool.py
@@ -2,35 +2,16 @@
 import importlib
 import sys
 from typing import Any
-from unittest.mock import MagicMock, patch
+from unittest.mock import MagicMock
 
 import pytest
 from langchain_core.tools.structured import StructuredTool
 from llama_index.core.tools import FunctionTool
 
 from cleanlab_codex.codex_tool import CodexTool
-from cleanlab_codex.internal.analytics import IntegrationType, _AnalyticsMetadata
 from cleanlab_codex.utils.errors import MissingDependencyError
 
 
-def test_tool_query_passes_metadata(mock_client_from_access_key: MagicMock) -> None:  # noqa: ARG001
-    mock_project = MagicMock()
-    mock_project.query.return_value = (None, None)
-
-    with patch("cleanlab_codex.codex_tool.Project.from_access_key", return_value=mock_project):
-        tool = CodexTool.from_access_key("sk-test-123")
-        tool.query("what is the capital of France?")
-
-        assert mock_project.query.call_count == 1
-        args, kwargs = mock_project.query.call_args
-        assert kwargs["question"] == "what is the capital of France?"
-        assert kwargs["fallback_answer"] == CodexTool.DEFAULT_FALLBACK_ANSWER
-        assert (
-            kwargs["_analytics_metadata"].to_headers()
-            == _AnalyticsMetadata(integration_type=IntegrationType.TOOL).to_headers()
-        )
-
-
 def patch_import_with_import_error(missing_module: str) -> None:
     def custom_import(name: str, *args: Any, **kwargs: Any) -> Any:
         if name.startswith(missing_module):
diff --git a/tests/test_project.py b/tests/test_project.py
@@ -7,17 +7,8 @@
 from codex.types.projects.access_key_retrieve_project_id_response import (
     AccessKeyRetrieveProjectIDResponse,
 )
-from codex.types.projects.entry_query_response import (
-    Entry as SDKEntry,
-)
-from codex.types.projects.entry_query_response import (
-    EntryManagedMetadata,
-    EntryManagedMetadataTrustworthiness,
-    EntryQueryResponse,
-)
 
 from cleanlab_codex.project import MissingProjectError, Project
-from cleanlab_codex.types.entry import EntryCreate
 
 FAKE_PROJECT_ID = str(uuid.uuid4())
 FAKE_USER_ID = "Test User"
@@ -71,44 +62,6 @@ def test_create_project(mock_client_from_api_key: MagicMock, default_headers: di
     assert mock_client_from_api_key.projects.retrieve.call_count == 0
 
 
-def test_add_entries(mock_client_from_api_key: MagicMock) -> None:
-    answered_entry_create = EntryCreate(
-        question="What is the capital of France?",
-        answer="Paris",
-    )
-    unanswered_entry_create = EntryCreate(
-        question="What is the capital of Germany?",
-    )
-    project = Project(mock_client_from_api_key, FAKE_PROJECT_ID)
-    project.add_entries([answered_entry_create, unanswered_entry_create])
-
-    for call, entry in zip(
-        mock_client_from_api_key.projects.entries.create.call_args_list,
-        [answered_entry_create, unanswered_entry_create],
-    ):
-        assert call.args[0] == FAKE_PROJECT_ID
-        assert call.kwargs["question"] == entry["question"]
-        assert call.kwargs["answer"] == entry.get("answer")
-
-
-def test_add_entries_no_access_key(mock_client_from_access_key: MagicMock) -> None:
-    mock_error = Mock(response=Mock(status=401), body={"error": "Unauthorized"})
-
-    mock_client_from_access_key.projects.entries.create.side_effect = AuthenticationError(
-        "test", response=mock_error.response, body=mock_error.body
-    )
-
-    answered_entry_create = EntryCreate(
-        question="What is the capital of France?",
-        answer="Paris",
-    )
-
-    project = Project.from_access_key(DUMMY_ACCESS_KEY)
-
-    with pytest.raises(AuthenticationError, match="See cleanlab_codex.Client.get_project"):
-        project.add_entries([answered_entry_create])
-
-
 def test_create_access_key(mock_client_from_api_key: MagicMock, default_headers: dict[str, str]) -> None:
     project = Project(mock_client_from_api_key, FAKE_PROJECT_ID)
     access_key_name = "Test Access Key"
@@ -144,83 +97,3 @@ def test_init_nonexistent_project_id(mock_client_from_access_key: MagicMock) ->
     with pytest.raises(MissingProjectError):
         Project(mock_client_from_access_key, FAKE_PROJECT_ID)
     assert mock_client_from_access_key.projects.retrieve.call_count == 1
-
-
-def test_query_question_found_fallback_answer(
-    mock_client_from_access_key: MagicMock,
-) -> None:
-    unanswered_entry = SDKEntry(
-        id=str(uuid.uuid4()),
-        question="What is the capital of France?",
-        answer=None,
-        managed_metadata=EntryManagedMetadata(trustworthiness=EntryManagedMetadataTrustworthiness(scores=[0.95])),
-    )
-
-    mock_client_from_access_key.projects.entries.query.return_value = EntryQueryResponse(
-        entry=unanswered_entry, answer=None
-    )
-    project = Project(mock_client_from_access_key, FAKE_PROJECT_ID)
-    res = project.query("What is the capital of France?")
-    assert res[0] is None
-    assert res[1] is not None
-    assert res[1].model_dump() == unanswered_entry.model_dump()
-
-
-def test_query_question_not_found_fallback_answer(
-    mock_client_from_access_key: MagicMock,
-) -> None:
-    mock_entry = SDKEntry(
-        id="fake-id",
-        question="What is the capital of France?",
-        answer=None,
-        managed_metadata=EntryManagedMetadata(trustworthiness=EntryManagedMetadataTrustworthiness(scores=[0.95])),
-    )
-    mock_client_from_access_key.projects.entries.query.return_value = EntryQueryResponse(entry=mock_entry, answer=None)
-
-    project = Project(mock_client_from_access_key, FAKE_PROJECT_ID)
-    res = project.query("What is the capital of France?", fallback_answer="Paris")
-    assert res[0] == "Paris"
-    assert res[1] is not None
-    assert res[1].model_dump() == mock_entry.model_dump()
-
-
-def test_query_answer_found(mock_client_from_access_key: MagicMock) -> None:
-    answered_entry = SDKEntry(
-        id=str(uuid.uuid4()),
-        question="What is the capital of France?",
-        answer="Paris",
-        managed_metadata=EntryManagedMetadata(trustworthiness=EntryManagedMetadataTrustworthiness(scores=[0.95])),
-    )
-    mock_client_from_access_key.projects.entries.query.return_value = EntryQueryResponse(
-        answer="Paris", entry=answered_entry
-    )
-    project = Project(mock_client_from_access_key, FAKE_PROJECT_ID)
-    res = project.query("What is the capital of France?")
-    assert res[0] == answered_entry.answer
-    assert res[1] is not None
-    assert res[1].model_dump() == answered_entry.model_dump()
-
-
-def test_query_answer_found_with_metadata(mock_client_from_access_key: MagicMock) -> None:
-    answered_entry = SDKEntry(
-        id=str(uuid.uuid4()),
-        question="What is the capital of France?",
-        answer="Paris",
-        client_query_metadata=[{"trustworthiness_score": 0.95}],
-        managed_metadata=EntryManagedMetadata(trustworthiness=EntryManagedMetadataTrustworthiness(scores=[0.95])),
-    )
-    mock_client_from_access_key.projects.entries.query.return_value = EntryQueryResponse(
-        answer="Paris", entry=answered_entry
-    )
-    project = Project(mock_client_from_access_key, FAKE_PROJECT_ID)
-    res = project.query("What is the capital of France?", metadata={"trustworthiness_score": 0.95})
-    assert res[0] == answered_entry.answer
-    assert res[1] is not None
-    assert res[1].model_dump() == answered_entry.model_dump()  # metadata should be included in the entry
-
-
-def test_add_entries_empty_list(mock_client_from_access_key: MagicMock) -> None:
-    """Test adding an empty list of entries"""
-    project = Project(mock_client_from_access_key, FAKE_PROJECT_ID)
-    project.add_entries([])
-    mock_client_from_access_key.projects.entries.create.assert_not_called()