Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

## [1.0.31] 2025-10-14

- Add `expert_guardrail_override_explanation` and `log_id` to `ProjectValidateResponse` docstring

## [1.0.30] 2025-10-01

- Update API reference language from Codex -> Cleanlab AI Platform
Expand Down Expand Up @@ -141,7 +145,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

- Initial release of the `cleanlab-codex` client library.

[Unreleased]: https://github.com/cleanlab/cleanlab-codex/compare/v1.0.30...HEAD
[Unreleased]: https://github.com/cleanlab/cleanlab-codex/compare/v1.0.31...HEAD
[1.0.31]: https://github.com/cleanlab/cleanlab-codex/compare/v1.0.30...v1.0.31
[1.0.30]: https://github.com/cleanlab/cleanlab-codex/compare/v1.0.29...v1.0.30
[1.0.29]: https://github.com/cleanlab/cleanlab-codex/compare/v1.0.28...v1.0.29
[1.0.28]: https://github.com/cleanlab/cleanlab-codex/compare/v1.0.27...v1.0.28
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ classifiers = [
]
dependencies = [
"cleanlab-tlm~=1.1,>=1.1.14",
"codex-sdk==0.1.0a28",
"codex-sdk==0.1.0a30",
"pydantic>=2.0.0, <3",
]

Expand Down
2 changes: 1 addition & 1 deletion src/cleanlab_codex/__about__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
# SPDX-License-Identifier: MIT
__version__ = "1.0.30"
__version__ = "1.0.31"
8 changes: 7 additions & 1 deletion src/cleanlab_codex/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,11 @@

from codex import Codex as _Codex
from codex.types.project_validate_response import ProjectValidateResponse
from openai.types.chat import ChatCompletion, ChatCompletionMessageParam, ChatCompletionToolParam
from openai.types.chat import (
ChatCompletion,
ChatCompletionMessageParam,
ChatCompletionToolParam,
)


_ERROR_CREATE_ACCESS_KEY = (
Expand Down Expand Up @@ -187,6 +191,8 @@ def validate(
- escalated_to_sme (bool): True if the query should be escalated to SME for review. When True, the query is logged and may be answered by an expert.
- eval_scores (dict[str, ThresholdedEvalScore]): Evaluation scores for different response attributes (e.g., trustworthiness, helpfulness, ...). Each includes a numeric score and a `failed` flag indicating whether the score falls below threshold.
- expert_answer (str | None): If it was auto-determined that this query should be escalated to SME, and a prior SME answer for a similar query was found, then this will return that expert answer. Otherwise, it is None.
- expert_guardrail_override_explanation (str | None): If the final guardrail decision was overridden by expert review, this will contain an explanation for why the guardrail result was overridden. Otherwise, it is None.
- log_id (str): The ID of the log created for this query.

When available, consider swapping your AI response with the expert answer before serving the response to your user.
"""
Expand Down
4 changes: 2 additions & 2 deletions tests/test_project.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@ def test_project_validate_with_dict_response(
openai_messages_conversational: list["ChatCompletionMessageParam"],
) -> None:
expected_result = ProjectValidateResponse(
is_bad_response=True,
expert_answer=None,
expert_guardrail_override_explanation=None,
eval_scores={
"response_helpfulness": EvalScores(
score=0.8,
Expand Down Expand Up @@ -125,7 +125,7 @@ def test_project_validate_with_tools(
openai_tools: list["ChatCompletionToolParam"],
) -> None:
expected_result = ProjectValidateResponse(
is_bad_response=True,
expert_guardrail_override_explanation=None,
expert_answer=None,
eval_scores={
"response_helpfulness": EvalScores(
Expand Down