canarybyte · canarybyte · May 10, 2026
diff --git a/src/relay_detector/protocols/openai/client.py b/src/relay_detector/protocols/openai/client.py
@@ -22,9 +22,25 @@
 RETRYABLE_STATUS = {429, 500, 502, 503, 504}
 MAX_BACKOFF_S = 30.0
 MAX_RETRIES = 3
-DEFAULT_TEMPERATURE_ONLY_PREFIXES = (
-    "gpt-5.5",
+# Reasoning-tier model families that REJECT sampling parameters. Sending
+# temperature (even temperature=0) returns HTTP 400 "Unsupported value:
+# 'temperature' does not support 0 with this model. Only the default (1)
+# value is supported." The safe action is to strip the field entirely.
+#
+# IMPORTANT: -mini / -nano sub-variants of these families are distinct
+# (non-reasoning) models that DO accept temperature — they must NOT be
+# stripped, otherwise consistency / model_consistency detectors lose
+# determinism and start flapping.
+#
+# Sources (May 2026):
+#   - https://community.openai.com/t/temperature-in-gpt-5-models/1337133
+#   - https://github.com/mem0ai/mem0/issues/4738 (gpt-5.4-mini accepts temp)
+#   - https://github.com/BerriAI/litellm/issues/27351 (gpt-5.1 reasoning_effort=none accepts temp)
+_TEMPERATURE_REJECTING_FAMILIES = (
+    "gpt-5.5",  # 5.5 / 5.5-pro / 5.5-2026-04-23 (no -mini/-nano variant exists yet)
+    "gpt-5.4",  # 5.4 / 5.4-pro — but NOT 5.4-mini / 5.4-nano
 )
+_TEMPERATURE_OK_SUB_VARIANTS = ("-mini", "-nano")
 
 
 def normalize_openai_base_url(base_url: str) -> str:
@@ -34,9 +50,29 @@ def normalize_openai_base_url(base_url: str) -> str:
     return normalized + "/v1"
 
 
+def _normalize_openai_model_id(model_id: str) -> str:
+    """Same dot/underscore→hyphen canonicalization used by models_match in
+    config.py, so users typing `gpt-5_4` or `gpt-5-4` map to the same
+    family bucket as `gpt-5.4`."""
+    return model_id.replace(".", "-").replace("_", "-")
+
+
+def _rejects_temperature(model_id: str) -> bool:
+    normalized = _normalize_openai_model_id(model_id)
+    for family in _TEMPERATURE_REJECTING_FAMILIES:
+        nf = _normalize_openai_model_id(family)
+        if not normalized.startswith(nf):
+            continue
+        tail = normalized[len(nf):]
+        if any(tail.startswith(suf) for suf in _TEMPERATURE_OK_SUB_VARIANTS):
+            return False
+        return True
+    return False
+
+
 def _sanitize_body(body: dict[str, Any]) -> dict[str, Any]:
     model = body.get("model")
-    if isinstance(model, str) and model.startswith(DEFAULT_TEMPERATURE_ONLY_PREFIXES):
+    if isinstance(model, str) and _rejects_temperature(model):
         body.pop("temperature", None)
     return body
 

diff --git a/tests/test_openai_phase2.py b/tests/test_openai_phase2.py
@@ -193,14 +193,39 @@ def handler(request: httpx.Request) -> httpx.Response:
     assert "temperature" not in captured["body"]
 
 
+@pytest.mark.parametrize(
+    "model,expect_stripped",
+    [
+        # Reasoning-tier models that reject temperature (HTTP 400 from OpenAI)
+        ("gpt-5.5", True),
+        ("gpt-5.5-pro", True),
+        ("gpt-5.5-2026-04-23", True),
+        ("gpt-5.4", True),
+        ("gpt-5.4-pro", True),
+        # Sub-variants of reasoning families ARE distinct (non-reasoning)
+        # models that accept temperature — must NOT be stripped
+        ("gpt-5.4-mini", False),
+        ("gpt-5.4-nano", False),
+        # Other GPT-5 lines accept temperature (5.1 with reasoning_effort=none)
+        ("gpt-5.1", False),
+        ("gpt-5.1-mini", False),
+        # Legacy / non-reasoning families — never stripped
+        ("gpt-4o", False),
+        ("gpt-4o-mini", False),
+        # Dot/hyphen/underscore canonicalization: same family bucket
+        ("gpt-5-4", True),
+        ("gpt-5_4", True),
+        ("gpt-5-4-mini", False),
+    ],
+)
 @pytest.mark.asyncio
-async def test_openai_client_keeps_temperature_for_other_models():
+async def test_openai_client_temperature_strip_per_model(model: str, expect_stripped: bool):
     captured: dict[str, Any] = {}
 
     def handler(request: httpx.Request) -> httpx.Response:
         import json as _json
         captured["body"] = _json.loads(request.content)
-        return httpx.Response(200, json=_chat_payload(model="gpt-5.4"))
+        return httpx.Response(200, json=_chat_payload(model=model))
 
     transport = httpx.MockTransport(handler)
     client = OpenAIChatClient("https://api.openai.com", "sk-test")
@@ -211,14 +236,23 @@ def handler(request: httpx.Request) -> httpx.Response:
     )
     try:
         await client.chat_completions_create(
-            model="gpt-5.4",
+            model=model,
             temperature=0,
             messages=[{"role": "user", "content": "hi"}],
         )
     finally:
         await client.aclose()
 
-    assert captured["body"]["temperature"] == 0
+    if expect_stripped:
+        assert "temperature" not in captured["body"], (
+            f"{model} is a reasoning-tier model that rejects temperature — "
+            "client must strip it before sending"
+        )
+    else:
+        assert captured["body"].get("temperature") == 0, (
+            f"{model} accepts temperature — client must NOT strip it "
+            "(stripping would lose detector determinism)"
+        )
 
 
 def test_openai_detectors_use_core_base_classes():