Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 39 additions & 3 deletions src/relay_detector/protocols/openai/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,25 @@
RETRYABLE_STATUS = {429, 500, 502, 503, 504}
MAX_BACKOFF_S = 30.0
MAX_RETRIES = 3
DEFAULT_TEMPERATURE_ONLY_PREFIXES = (
"gpt-5.5",
# Reasoning-tier model families that REJECT sampling parameters. Sending
# temperature (even temperature=0) returns HTTP 400 "Unsupported value:
# 'temperature' does not support 0 with this model. Only the default (1)
# value is supported." The safe action is to strip the field entirely.
#
# IMPORTANT: -mini / -nano sub-variants of these families are distinct
# (non-reasoning) models that DO accept temperature — they must NOT be
# stripped, otherwise consistency / model_consistency detectors lose
# determinism and start flapping.
#
# Sources (May 2026):
# - https://community.openai.com/t/temperature-in-gpt-5-models/1337133
# - https://github.com/mem0ai/mem0/issues/4738 (gpt-5.4-mini accepts temp)
# - https://github.com/BerriAI/litellm/issues/27351 (gpt-5.1 reasoning_effort=none accepts temp)
_TEMPERATURE_REJECTING_FAMILIES = (
"gpt-5.5", # 5.5 / 5.5-pro / 5.5-2026-04-23 (no -mini/-nano variant exists yet)
"gpt-5.4", # 5.4 / 5.4-pro — but NOT 5.4-mini / 5.4-nano
)
_TEMPERATURE_OK_SUB_VARIANTS = ("-mini", "-nano")


def normalize_openai_base_url(base_url: str) -> str:
Expand All @@ -34,9 +50,29 @@ def normalize_openai_base_url(base_url: str) -> str:
return normalized + "/v1"


def _normalize_openai_model_id(model_id: str) -> str:
"""Same dot/underscore→hyphen canonicalization used by models_match in
config.py, so users typing `gpt-5_4` or `gpt-5-4` map to the same
family bucket as `gpt-5.4`."""
return model_id.replace(".", "-").replace("_", "-")


def _rejects_temperature(model_id: str) -> bool:
normalized = _normalize_openai_model_id(model_id)
for family in _TEMPERATURE_REJECTING_FAMILIES:
nf = _normalize_openai_model_id(family)
if not normalized.startswith(nf):
continue
tail = normalized[len(nf):]
if any(tail.startswith(suf) for suf in _TEMPERATURE_OK_SUB_VARIANTS):
return False
return True
return False


def _sanitize_body(body: dict[str, Any]) -> dict[str, Any]:
model = body.get("model")
if isinstance(model, str) and model.startswith(DEFAULT_TEMPERATURE_ONLY_PREFIXES):
if isinstance(model, str) and _rejects_temperature(model):
body.pop("temperature", None)
return body

Expand Down
42 changes: 38 additions & 4 deletions tests/test_openai_phase2.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,14 +193,39 @@ def handler(request: httpx.Request) -> httpx.Response:
assert "temperature" not in captured["body"]


@pytest.mark.parametrize(
"model,expect_stripped",
[
# Reasoning-tier models that reject temperature (HTTP 400 from OpenAI)
("gpt-5.5", True),
("gpt-5.5-pro", True),
("gpt-5.5-2026-04-23", True),
("gpt-5.4", True),
("gpt-5.4-pro", True),
# Sub-variants of reasoning families ARE distinct (non-reasoning)
# models that accept temperature — must NOT be stripped
("gpt-5.4-mini", False),
("gpt-5.4-nano", False),
# Other GPT-5 lines accept temperature (5.1 with reasoning_effort=none)
("gpt-5.1", False),
("gpt-5.1-mini", False),
# Legacy / non-reasoning families — never stripped
("gpt-4o", False),
("gpt-4o-mini", False),
# Dot/hyphen/underscore canonicalization: same family bucket
("gpt-5-4", True),
("gpt-5_4", True),
("gpt-5-4-mini", False),
],
)
@pytest.mark.asyncio
async def test_openai_client_keeps_temperature_for_other_models():
async def test_openai_client_temperature_strip_per_model(model: str, expect_stripped: bool):
captured: dict[str, Any] = {}

def handler(request: httpx.Request) -> httpx.Response:
import json as _json
captured["body"] = _json.loads(request.content)
return httpx.Response(200, json=_chat_payload(model="gpt-5.4"))
return httpx.Response(200, json=_chat_payload(model=model))

transport = httpx.MockTransport(handler)
client = OpenAIChatClient("https://api.openai.com", "sk-test")
Expand All @@ -211,14 +236,23 @@ def handler(request: httpx.Request) -> httpx.Response:
)
try:
await client.chat_completions_create(
model="gpt-5.4",
model=model,
temperature=0,
messages=[{"role": "user", "content": "hi"}],
)
finally:
await client.aclose()

assert captured["body"]["temperature"] == 0
if expect_stripped:
assert "temperature" not in captured["body"], (
f"{model} is a reasoning-tier model that rejects temperature — "
"client must strip it before sending"
)
else:
assert captured["body"].get("temperature") == 0, (
f"{model} accepts temperature — client must NOT strip it "
"(stripping would lose detector determinism)"
)


def test_openai_detectors_use_core_base_classes():
Expand Down
Loading