diff --git a/src/relay_detector/protocols/openai/client.py b/src/relay_detector/protocols/openai/client.py index e8a8d47..38c4228 100644 --- a/src/relay_detector/protocols/openai/client.py +++ b/src/relay_detector/protocols/openai/client.py @@ -22,9 +22,25 @@ RETRYABLE_STATUS = {429, 500, 502, 503, 504} MAX_BACKOFF_S = 30.0 MAX_RETRIES = 3 -DEFAULT_TEMPERATURE_ONLY_PREFIXES = ( - "gpt-5.5", +# Reasoning-tier model families that REJECT sampling parameters. Sending +# temperature (even temperature=0) returns HTTP 400 "Unsupported value: +# 'temperature' does not support 0 with this model. Only the default (1) +# value is supported." The safe action is to strip the field entirely. +# +# IMPORTANT: -mini / -nano sub-variants of these families are distinct +# (non-reasoning) models that DO accept temperature — they must NOT be +# stripped, otherwise consistency / model_consistency detectors lose +# determinism and start flapping. +# +# Sources (May 2026): +# - https://community.openai.com/t/temperature-in-gpt-5-models/1337133 +# - https://github.com/mem0ai/mem0/issues/4738 (gpt-5.4-mini accepts temp) +# - https://github.com/BerriAI/litellm/issues/27351 (gpt-5.1 reasoning_effort=none accepts temp) +_TEMPERATURE_REJECTING_FAMILIES = ( + "gpt-5.5", # 5.5 / 5.5-pro / 5.5-2026-04-23 (no -mini/-nano variant exists yet) + "gpt-5.4", # 5.4 / 5.4-pro — but NOT 5.4-mini / 5.4-nano ) +_TEMPERATURE_OK_SUB_VARIANTS = ("-mini", "-nano") def normalize_openai_base_url(base_url: str) -> str: @@ -34,9 +50,29 @@ def normalize_openai_base_url(base_url: str) -> str: return normalized + "/v1" +def _normalize_openai_model_id(model_id: str) -> str: + """Same dot/underscore→hyphen canonicalization used by models_match in + config.py, so users typing `gpt-5_4` or `gpt-5-4` map to the same + family bucket as `gpt-5.4`.""" + return model_id.replace(".", "-").replace("_", "-") + + +def _rejects_temperature(model_id: str) -> bool: + normalized = _normalize_openai_model_id(model_id) + for family in _TEMPERATURE_REJECTING_FAMILIES: + nf = _normalize_openai_model_id(family) + if not normalized.startswith(nf): + continue + tail = normalized[len(nf):] + if any(tail.startswith(suf) for suf in _TEMPERATURE_OK_SUB_VARIANTS): + return False + return True + return False + + def _sanitize_body(body: dict[str, Any]) -> dict[str, Any]: model = body.get("model") - if isinstance(model, str) and model.startswith(DEFAULT_TEMPERATURE_ONLY_PREFIXES): + if isinstance(model, str) and _rejects_temperature(model): body.pop("temperature", None) return body diff --git a/tests/test_openai_phase2.py b/tests/test_openai_phase2.py index 937f8b8..4c8d248 100644 --- a/tests/test_openai_phase2.py +++ b/tests/test_openai_phase2.py @@ -193,14 +193,39 @@ def handler(request: httpx.Request) -> httpx.Response: assert "temperature" not in captured["body"] +@pytest.mark.parametrize( + "model,expect_stripped", + [ + # Reasoning-tier models that reject temperature (HTTP 400 from OpenAI) + ("gpt-5.5", True), + ("gpt-5.5-pro", True), + ("gpt-5.5-2026-04-23", True), + ("gpt-5.4", True), + ("gpt-5.4-pro", True), + # Sub-variants of reasoning families ARE distinct (non-reasoning) + # models that accept temperature — must NOT be stripped + ("gpt-5.4-mini", False), + ("gpt-5.4-nano", False), + # Other GPT-5 lines accept temperature (5.1 with reasoning_effort=none) + ("gpt-5.1", False), + ("gpt-5.1-mini", False), + # Legacy / non-reasoning families — never stripped + ("gpt-4o", False), + ("gpt-4o-mini", False), + # Dot/hyphen/underscore canonicalization: same family bucket + ("gpt-5-4", True), + ("gpt-5_4", True), + ("gpt-5-4-mini", False), + ], +) @pytest.mark.asyncio -async def test_openai_client_keeps_temperature_for_other_models(): +async def test_openai_client_temperature_strip_per_model(model: str, expect_stripped: bool): captured: dict[str, Any] = {} def handler(request: httpx.Request) -> httpx.Response: import json as _json captured["body"] = _json.loads(request.content) - return httpx.Response(200, json=_chat_payload(model="gpt-5.4")) + return httpx.Response(200, json=_chat_payload(model=model)) transport = httpx.MockTransport(handler) client = OpenAIChatClient("https://api.openai.com", "sk-test") @@ -211,14 +236,23 @@ def handler(request: httpx.Request) -> httpx.Response: ) try: await client.chat_completions_create( - model="gpt-5.4", + model=model, temperature=0, messages=[{"role": "user", "content": "hi"}], ) finally: await client.aclose() - assert captured["body"]["temperature"] == 0 + if expect_stripped: + assert "temperature" not in captured["body"], ( + f"{model} is a reasoning-tier model that rejects temperature — " + "client must strip it before sending" + ) + else: + assert captured["body"].get("temperature") == 0, ( + f"{model} accepts temperature — client must NOT strip it " + "(stripping would lose detector determinism)" + ) def test_openai_detectors_use_core_base_classes():