From 98f962dc19a0b5e83ab00b09e21bd90f264ae7ff Mon Sep 17 00:00:00 2001
From: Chenglong Wang <ryuu.j.ching@gmail.com>
Date: Thu, 14 May 2026 17:47:18 +0800
Subject: [PATCH 1/2] fix(auto-run): unwedge experiment loop when provider
 rejects `temperature`
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

PRJ-0002 (2026-05-14) wedged on `azure/anthropic/claude-opus-4-7`
returning `invalid_request_error: \`temperature\` is deprecated for this
model.` Every auto-run round re-hit the same parameter error until
`_AUTO_MAX_ROUNDS` (20) was exhausted, surfacing the error to the user
instead of any research result.

Three independent bugs collided:

1. `temperature` was unconditionally attached to outbound requests for
   models that no longer accept it. Centralise the rule in a new
   `providers.model_compat` registry (currently lists `claude-opus-4-7`)
   and gate temperature emission on it in the Azure, LiteLLM, and
   Anthropic providers. Azure's existing `_supports_temperature` rule
   for `gpt-5`/`o*` deployments is preserved on top.

2. `RoutedProviderManager.chat` blindly walked the fallback chain when a
   provider RAISED rather than returned an error response, so a
   permanent 4xx burned every remaining candidate. Apply the same
   `_should_retry_with_fallback` classification used on the response
   path; non-retryable exceptions now short-circuit immediately.

3. `_evaluate_continuation` only stopped auto mode on failure responses
   when `strictHeuristics` was on. LLM-provider errors are NOT
   experiment outcomes — they mean the model never produced a turn, so
   the next round will hit the same error. Add an unconditional
   `_looks_like_llm_provider_error` check that halts auto mode with
   `stop_reason="llm provider error"` regardless of policy.

Tests cover the model_compat blocklist under every provider prefix,
the Azure body builder dropping temperature for `claude-opus-4-7`,
non-retryable raised exceptions not burning the fallback chain, and
auto-run halting on the exact error text observed in PRJ-0002.
---
 mira_engine/agent/research_loop.py            | 47 +++++++++
 mira_engine/agent/routing.py                  | 16 ++-
 mira_engine/providers/anthropic_provider.py   | 14 ++-
 .../providers/azure_openai_provider.py        | 14 ++-
 mira_engine/providers/litellm_provider.py     |  9 ++
 mira_engine/providers/model_compat.py         | 44 +++++++++
 tests/providers/test_azure_openai_provider.py | 32 ++++++
 tests/providers/test_model_compat.py          | 39 ++++++++
 tests/test_model_routing.py                   | 98 +++++++++++++++++++
 tests/test_research_loop_core.py              | 51 ++++++++++
 10 files changed, 358 insertions(+), 6 deletions(-)
 create mode 100644 mira_engine/providers/model_compat.py
 create mode 100644 tests/providers/test_model_compat.py

diff --git a/mira_engine/agent/research_loop.py b/mira_engine/agent/research_loop.py
index 8829a7b..f3a3705 100644
--- a/mira_engine/agent/research_loop.py
+++ b/mira_engine/agent/research_loop.py
@@ -321,6 +321,45 @@ def _looks_like_failure_response(cls, text: str | None) -> bool:
         )
         return any(k in tail for k in soft_signals)
 
+    @classmethod
+    def _looks_like_llm_provider_error(cls, text: str | None) -> bool:
+        """Detect when ``final_content`` is a system-level LLM call failure.
+
+        These are surfaced by provider error handlers (``_handle_error`` in
+        ``anthropic_provider`` / ``azure_openai_provider`` / etc., and the
+        chain-failure path in ``RoutedProviderManager.chat``) and must always
+        halt auto mode — they are NOT experiment outcomes. Without this
+        guard a parameter-level 4xx (e.g. Azure dropping ``temperature``)
+        gets retried every round until ``_AUTO_MAX_ROUNDS`` (20) is
+        exhausted, which is what we saw in PRJ-0002 on 2026-05-14.
+
+        Unlike :meth:`_looks_like_failure_response`, this check fires
+        regardless of ``strictHeuristics`` because the agent has not even
+        produced a turn — there is nothing to iterate on.
+        """
+        if not text:
+            return False
+        lowered = text.lower()
+        markers = (
+            # Provider wrappers (see anthropic_provider._handle_error,
+            # azure_openai_provider._handle_error, openai_compat_provider,
+            # litellm_provider.chat, openai_codex_provider).
+            "error calling llm",
+            "error calling azure openai",
+            "error calling codex",
+            "error calling github copilot",
+            # Underlying SDK / gateway error types.
+            "litellm.badrequesterror",
+            "azure_aiexception",
+            "invalid_request_error",
+            "bad_request_error",
+            # RoutedProviderManager terminal message.
+            "all candidate models failed for this turn",
+            # base_loop fallback when an error response has no content.
+            "sorry, i encountered an error calling the ai model",
+        )
+        return any(marker in lowered for marker in markers)
+
     # ------------------------------------------------------------------
     # task_plan loaders / inspectors
     # ------------------------------------------------------------------
@@ -874,6 +913,14 @@ def _evaluate_continuation(
         """
         if run_mode != "auto":
             return False, None
+        # LLM provider errors must halt the loop unconditionally — the agent
+        # never even produced a turn, so the next round will hit the exact
+        # same failure (parameter rejected, auth invalid, gateway down, ...).
+        # Without this guard a single bad request burns through all 20
+        # auto-run rounds before surfacing the error to the user.
+        if self._looks_like_llm_provider_error(final_content):
+            logger.warning("Auto mode halting: LLM provider error in final response")
+            return False, "llm provider error"
         if not self._guard_task_plan_structure(project_dir, profile=agent_profile):
             return False, "task_plan guardrail blocking"
         if auto_round >= self._AUTO_MAX_ROUNDS:
diff --git a/mira_engine/agent/routing.py b/mira_engine/agent/routing.py
index 7c13ea0..4fb99e8 100644
--- a/mira_engine/agent/routing.py
+++ b/mira_engine/agent/routing.py
@@ -281,14 +281,26 @@ async def chat(
             except Exception as exc:
                 last_error = exc
                 self._mark_model_failed(model)
-                if index < len(candidates) - 1:
+                # Mirror the response-path classification so a permanent 4xx
+                # (auth, invalid_request_error, bad request, ...) does not
+                # blindly burn the rest of the fallback chain — those errors
+                # will fail identically on every other candidate.
+                exc_text = str(exc) or exc.__class__.__name__
+                is_retryable = self._should_retry_with_fallback(exc_text)
+                if is_retryable and index < len(candidates) - 1:
                     logger.warning(
-                        "Model '{}' raised '{}'; trying fallback model '{}'",
+                        "Model '{}' raised retryable error '{}'; trying fallback model '{}'",
                         model,
                         exc,
                         candidates[index + 1],
                     )
                     continue
+                if not is_retryable:
+                    logger.warning(
+                        "Model '{}' raised non-retryable error '{}'; skipping fallback candidates",
+                        model,
+                        exc,
+                    )
                 raise
 
             if response.finish_reason != "error" or not self._should_retry_with_fallback(response.content):
diff --git a/mira_engine/providers/anthropic_provider.py b/mira_engine/providers/anthropic_provider.py
index 07cb001..863a442 100644
--- a/mira_engine/providers/anthropic_provider.py
+++ b/mira_engine/providers/anthropic_provider.py
@@ -13,6 +13,7 @@
 import json_repair
 
 from mira_engine.providers.base import LLMProvider, LLMResponse, ToolCallRequest
+from mira_engine.providers.model_compat import model_supports_temperature
 
 _ALNUM = string.ascii_letters + string.digits
 
@@ -380,19 +381,26 @@ def _build_kwargs(
         if system:
             kwargs["system"] = system
 
+        # Some models (e.g. claude-opus-4-7 fronted by Azure AI) reject
+        # `temperature` outright with `invalid_request_error`. Resolve this
+        # once via the shared registry so every code path agrees.
+        temperature_allowed = model_supports_temperature(model) and model_supports_temperature(model_name)
+
         if reasoning_effort == "adaptive":
             # Adaptive thinking: model decides when and how much to think
             # Supported on claude-sonnet-4-6 and claude-opus-4-6.
             # Also auto-enables interleaved thinking between tool calls.
             kwargs["thinking"] = {"type": "adaptive"}
-            kwargs["temperature"] = 1.0
+            if temperature_allowed:
+                kwargs["temperature"] = 1.0
         elif thinking_enabled:
             budget_map = {"low": 1024, "medium": 4096, "high": max(8192, max_tokens)}
             budget = budget_map.get(reasoning_effort.lower(), 4096)
             kwargs["thinking"] = {"type": "enabled", "budget_tokens": budget}
             kwargs["max_tokens"] = max(max_tokens, budget + 4096)
-            kwargs["temperature"] = 1.0
-        else:
+            if temperature_allowed:
+                kwargs["temperature"] = 1.0
+        elif temperature_allowed:
             kwargs["temperature"] = temperature
 
         if anthropic_tools:
diff --git a/mira_engine/providers/azure_openai_provider.py b/mira_engine/providers/azure_openai_provider.py
index 5c52fa8..7b15ac6 100644
--- a/mira_engine/providers/azure_openai_provider.py
+++ b/mira_engine/providers/azure_openai_provider.py
@@ -9,6 +9,7 @@
 from openai import AsyncOpenAI
 
 from mira_engine.providers.base import LLMProvider, LLMResponse
+from mira_engine.providers.model_compat import model_supports_temperature
 from mira_engine.providers.openai_responses import (
     consume_sdk_stream,
     convert_messages,
@@ -55,9 +56,20 @@ def _supports_temperature(
         deployment_name: str,
         reasoning_effort: str | None = None,
     ) -> bool:
-        """Return True when temperature is likely supported for this deployment."""
+        """Return True when temperature is likely supported for this deployment.
+
+        Combines two rule sets:
+          - Azure-hosted OpenAI reasoning deployments (``gpt-5``, ``o1``,
+            ``o3``, ``o4``) and any call passing ``reasoning_effort`` drop
+            ``temperature``.
+          - Models flagged in :mod:`providers.model_compat` (e.g.
+            Azure-hosted ``claude-opus-4-7``) drop ``temperature`` regardless
+            of deployment prefix.
+        """
         if reasoning_effort:
             return False
+        if not model_supports_temperature(deployment_name):
+            return False
         name = deployment_name.lower()
         return not any(token in name for token in ("gpt-5", "o1", "o3", "o4"))
 
diff --git a/mira_engine/providers/litellm_provider.py b/mira_engine/providers/litellm_provider.py
index 9c5d7bc..08f1368 100644
--- a/mira_engine/providers/litellm_provider.py
+++ b/mira_engine/providers/litellm_provider.py
@@ -12,6 +12,7 @@
 from loguru import logger
 
 from mira_engine.providers.base import LLMProvider, LLMResponse, ToolCallRequest
+from mira_engine.providers.model_compat import model_supports_temperature
 from mira_engine.providers.registry import find_by_model, find_gateway
 
 # Standard chat-completion message keys.
@@ -247,6 +248,14 @@ async def chat(
             "temperature": temperature,
         }
 
+        # Strip `temperature` for models that reject it (e.g.
+        # ``azure/anthropic/claude-opus-4-7`` returns
+        # ``invalid_request_error: \`temperature\` is deprecated for this model.``).
+        # Done before _apply_model_overrides so a registry override can still
+        # re-add it intentionally if some future provider needs that.
+        if not model_supports_temperature(original_model) or not model_supports_temperature(model):
+            kwargs.pop("temperature", None)
+
         # Apply model-specific overrides (e.g. kimi-k2.5 temperature)
         self._apply_model_overrides(model, kwargs)
 
diff --git a/mira_engine/providers/model_compat.py b/mira_engine/providers/model_compat.py
new file mode 100644
index 0000000..6ddb1a3
--- /dev/null
+++ b/mira_engine/providers/model_compat.py
@@ -0,0 +1,44 @@
+r"""Model-level compatibility flags shared across providers.
+
+Some newer LLMs (Anthropic Claude Opus 4-7, several OpenAI reasoning
+deployments, etc.) reject the ``temperature`` parameter outright. The
+property is **model-scoped**, not provider-scoped: the same model is
+exposed by Anthropic directly, by Azure AI's Anthropic deployment, by
+AiHubMix/OpenRouter gateways, etc., and every path must drop the
+parameter or the request 400s.
+
+Keeping the rule in one place avoids the failure mode we observed in
+production where ``azure/anthropic/claude-opus-4-7`` repeatedly errored
+with ``\`temperature\` is deprecated for this model.`` while every
+provider builder still attached ``temperature``.
+
+To extend, add a substring token to ``TEMPERATURE_UNSUPPORTED_MODEL_TOKENS``.
+"""
+
+from __future__ import annotations
+
+# Substrings (case-insensitive) that identify models which reject
+# `temperature`. Match is intentionally loose so it catches the model
+# under every provider/gateway prefix (``anthropic/...``,
+# ``azure/anthropic/...``, ``openrouter/anthropic/...``, etc.).
+TEMPERATURE_UNSUPPORTED_MODEL_TOKENS: frozenset[str] = frozenset(
+    {
+        # Claude Opus 4.x on Azure AI rejects `temperature` with
+        # `invalid_request_error: \`temperature\` is deprecated for this model.`
+        # The same model on the native Anthropic API still accepts it today,
+        # but stripping it everywhere is safe (Anthropic defaults to 1.0).
+        "claude-opus-4-7",
+    }
+)
+
+
+def model_supports_temperature(model: str | None) -> bool:
+    """Return True when the model is expected to accept ``temperature``.
+
+    Empty / unknown model strings default to True so we don't accidentally
+    suppress the parameter for ordinary models.
+    """
+    if not model:
+        return True
+    lowered = model.lower()
+    return not any(token in lowered for token in TEMPERATURE_UNSUPPORTED_MODEL_TOKENS)
diff --git a/tests/providers/test_azure_openai_provider.py b/tests/providers/test_azure_openai_provider.py
index 2af6b0c..615466e 100644
--- a/tests/providers/test_azure_openai_provider.py
+++ b/tests/providers/test_azure_openai_provider.py
@@ -78,6 +78,38 @@ def test_supports_temperature_with_reasoning_effort():
     assert AzureOpenAIProvider._supports_temperature("gpt-4o", reasoning_effort="medium") is False
 
 
+def test_supports_temperature_blocks_claude_opus_4_7():
+    """Bug 1 regression: Azure-hosted ``claude-opus-4-7`` deployments reject
+    ``temperature`` outright. The blocklist (via providers.model_compat) must
+    catch the deployment under every prefix variant.
+    """
+    assert AzureOpenAIProvider._supports_temperature("claude-opus-4-7") is False
+    assert AzureOpenAIProvider._supports_temperature("azure/anthropic/claude-opus-4-7") is False
+    assert AzureOpenAIProvider._supports_temperature("anthropic/claude-opus-4-7") is False
+
+
+def test_build_body_drops_temperature_for_claude_opus_4_7():
+    """The Responses API body must NOT carry ``temperature`` when the model
+    is on the blocklist — otherwise Azure returns ``invalid_request_error``
+    and the agent's auto-run loop wedges (see PRJ-0002 incident 2026-05-14).
+    """
+    provider = AzureOpenAIProvider(
+        api_key="k",
+        api_base="https://r.openai.azure.com",
+        default_model="claude-opus-4-7",
+    )
+    body = provider._build_body(
+        [{"role": "user", "content": "hi"}],
+        None,
+        "azure/anthropic/claude-opus-4-7",
+        4096,
+        0.7,
+        None,
+        None,
+    )
+    assert "temperature" not in body
+
+
 # ---------------------------------------------------------------------------
 # _build_body — Responses API body construction
 # ---------------------------------------------------------------------------
diff --git a/tests/providers/test_model_compat.py b/tests/providers/test_model_compat.py
new file mode 100644
index 0000000..37c8e9b
--- /dev/null
+++ b/tests/providers/test_model_compat.py
@@ -0,0 +1,39 @@
+"""Tests for the shared model-compatibility helper."""
+
+from __future__ import annotations
+
+from mira_engine.providers.model_compat import (
+    TEMPERATURE_UNSUPPORTED_MODEL_TOKENS,
+    model_supports_temperature,
+)
+
+
+def test_ordinary_models_support_temperature():
+    assert model_supports_temperature("anthropic/claude-sonnet-4-5") is True
+    assert model_supports_temperature("openai/gpt-4o") is True
+    assert model_supports_temperature("gpt-4.1-mini") is True
+
+
+def test_empty_or_none_model_defaults_to_supported():
+    # We don't want to silently strip temperature for unknown models —
+    # only an explicit token match disables it.
+    assert model_supports_temperature(None) is True
+    assert model_supports_temperature("") is True
+
+
+def test_claude_opus_4_7_is_blocked_under_every_provider_prefix():
+    # Bug 1: in production we observed `azure/anthropic/claude-opus-4-7`
+    # rejecting `temperature` with
+    #   invalid_request_error: `temperature` is deprecated for this model.
+    # The token rule must catch the model under every prefix variant.
+    assert model_supports_temperature("claude-opus-4-7") is False
+    assert model_supports_temperature("anthropic/claude-opus-4-7") is False
+    assert model_supports_temperature("azure/anthropic/claude-opus-4-7") is False
+    assert model_supports_temperature("openrouter/anthropic/claude-opus-4-7") is False
+    assert model_supports_temperature("Azure/Anthropic/Claude-Opus-4-7") is False
+
+
+def test_blocklist_is_a_frozenset():
+    # Guards against accidental in-place mutation from another module.
+    assert isinstance(TEMPERATURE_UNSUPPORTED_MODEL_TOKENS, frozenset)
+    assert "claude-opus-4-7" in TEMPERATURE_UNSUPPORTED_MODEL_TOKENS
diff --git a/tests/test_model_routing.py b/tests/test_model_routing.py
index 314747e..c4e6e67 100644
--- a/tests/test_model_routing.py
+++ b/tests/test_model_routing.py
@@ -282,6 +282,104 @@ async def test_routing_prefers_recently_successful_routing_model() -> None:
     assert broken.calls == 1
 
 
+class _RaisingProvider(LLMProvider):
+    """Provider that always raises the same exception."""
+
+    def __init__(self, exc: Exception):
+        super().__init__()
+        self.exc = exc
+        self.calls = 0
+
+    async def chat(
+        self,
+        messages: list[dict[str, Any]],
+        tools: list[dict[str, Any]] | None = None,
+        model: str | None = None,
+        max_tokens: int = 4096,
+        temperature: float = 0.7,
+        reasoning_effort: str | None = None,
+    ) -> LLMResponse:
+        self.calls += 1
+        raise self.exc
+
+    def get_default_model(self) -> str:
+        return "anthropic/claude-opus-4-5"
+
+
+async def test_chat_falls_back_on_retryable_raised_exception() -> None:
+    """Existing behaviour preserved: a raised retryable exception should
+    still walk through to the next fallback candidate."""
+    raising = _RaisingProvider(TimeoutError("Request timed out"))
+    healthy = _FakeProvider()
+    providers = {
+        "openai/gpt-4.1-mini": raising,
+        "openai/gpt-4.1-nano": healthy,
+    }
+    manager = RoutedProviderManager(
+        default_provider=_FakeProvider(),
+        default_model="anthropic/claude-opus-4-5",
+        router=None,
+        provider_factory=lambda model: providers[model],
+    )
+
+    response, resolved_route = await manager.chat(
+        route=RoutedModel(
+            tier="small",
+            model="openai/gpt-4.1-mini",
+            candidates=("openai/gpt-4.1-mini", "openai/gpt-4.1-nano"),
+            source="test",
+        ),
+        messages=[{"role": "user", "content": "hello"}],
+    )
+
+    assert response.content == "ok"
+    assert resolved_route.model == "openai/gpt-4.1-nano"
+    assert raising.calls == 1
+
+
+async def test_chat_does_not_fallback_on_non_retryable_raised_exception() -> None:
+    """Bug 2 regression: when a provider RAISES (rather than returns an error
+    response) with a permanent 4xx like ``invalid_request_error``, the manager
+    must NOT silently burn the remaining fallback chain — the next candidate
+    will fail identically. The exception is re-raised after marking the
+    model failed.
+    """
+    permanent = _RaisingProvider(
+        RuntimeError("400 invalid_request_error: `temperature` is deprecated for this model.")
+    )
+    other = _RaisingProvider(RuntimeError("should not be called"))
+    providers = {
+        "openai/gpt-4.1-mini": permanent,
+        "openai/gpt-4.1-nano": other,
+    }
+    manager = RoutedProviderManager(
+        default_provider=_FakeProvider(),
+        default_model="anthropic/claude-opus-4-5",
+        router=None,
+        provider_factory=lambda model: providers[model],
+    )
+
+    raised: Exception | None = None
+    try:
+        await manager.chat(
+            route=RoutedModel(
+                tier="small",
+                model="openai/gpt-4.1-mini",
+                candidates=("openai/gpt-4.1-mini", "openai/gpt-4.1-nano"),
+                source="test",
+            ),
+            messages=[{"role": "user", "content": "hello"}],
+        )
+    except Exception as exc:
+        raised = exc
+
+    assert raised is not None
+    assert "invalid_request_error" in str(raised)
+    assert permanent.calls == 1
+    # Fallback candidate must NOT have been invoked.
+    assert other.calls == 0
+
+
 async def test_chat_reports_error_when_all_candidate_models_fail() -> None:
     manager = RoutedProviderManager(
         default_provider=_FakeProvider(),
diff --git a/tests/test_research_loop_core.py b/tests/test_research_loop_core.py
index 535fa32..fd57a9c 100644
--- a/tests/test_research_loop_core.py
+++ b/tests/test_research_loop_core.py
@@ -245,6 +245,57 @@ def test_auto_run_decision_helpers(tmp_path: Path) -> None:
         auto_round=0,
     ) is False
 
+    # Bug 3 regression: when the LLM call itself fails (parameter rejected,
+    # gateway down, all candidates exhausted) the final_content is a system
+    # error and auto-run must halt unconditionally — even when there is
+    # pending work in the plan and strictHeuristics is the default. Without
+    # this guard, a single bad request burns all 20 auto rounds repeatedly
+    # hitting the same error (see PRJ-0002 incident 2026-05-14).
+    assert ResearchAgentLoop._looks_like_llm_provider_error(
+        "Error calling LLM: litellm.BadRequestError: Azure_aiException - "
+        "{\"type\":\"error\",\"error\":{\"type\":\"invalid_request_error\","
+        "\"message\":\"`temperature` is deprecated for this model.\"}}"
+    ) is True
+    assert ResearchAgentLoop._looks_like_llm_provider_error(
+        "Error calling Azure OpenAI: Connection failed"
+    ) is True
+    assert ResearchAgentLoop._looks_like_llm_provider_error(
+        "All candidate models failed for this turn. Last error from "
+        "'azure/anthropic/claude-opus-4-7': Error calling LLM: Request timed out."
+    ) is True
+    # Ordinary experiment outputs that happen to contain the word "error"
+    # must NOT trigger this guard.
+    assert ResearchAgentLoop._looks_like_llm_provider_error(
+        "实验完成，error rate 降至 0.03。继续下一组消融。"
+    ) is False
+    assert ResearchAgentLoop._looks_like_llm_provider_error(None) is False
+
+    decision, reason = loop._evaluate_continuation(
+        run_mode="auto",
+        project_dir=str(project),
+        final_content=(
+            "Error: {'message': 'litellm.BadRequestError: Azure_aiException - "
+            "{\"type\":\"error\",\"error\":{\"type\":\"invalid_request_error\","
+            "\"message\":\"`temperature` is deprecated for this model.\"}}'}"
+        ),
+        auto_round=1,
+    )
+    assert decision is False
+    assert reason == "llm provider error"
+
+    # Even with strictHeuristics disabled (the production config from PRJ-0002
+    # had no automation_policy, which defaults to relaxed heuristics) the
+    # LLM provider error must still halt.
+    decision, reason = loop._evaluate_continuation(
+        run_mode="auto",
+        project_dir=str(project),
+        final_content="All candidate models failed for this turn. Last error from 'gpt-5.5': Error calling LLM: Request timed out.",
+        auto_round=1,
+        automation_policy=relaxed_policy,
+    )
+    assert decision is False
+    assert reason == "llm provider error"
+
     exhausted_policy = loop._parse_automation_policy(
         {
             "logic": "AND",

From 2dd914085bc691a0e764180d50cddce84cb3c389 Mon Sep 17 00:00:00 2001
From: Chenglong Wang <ryuu.j.ching@gmail.com>
Date: Thu, 14 May 2026 21:33:16 +0800
Subject: [PATCH 2/2] fix(providers): drop temperature for claude-opus-4-7 on
 OpenAICompatProvider too

Second occurrence at 2026-05-14 21:09: the temperature error reappeared
even after the first PR fix. Root cause: `OpenAICompatProvider`
(used by `custom` provider configs and by `GitHubCopilotProvider` via
inheritance) keeps its own `_supports_temperature` rule that only
blocked GPT-5 / o1 / o3 / o4 deployments. When a user's OpenAI-compatible
endpoint proxies to Azure-hosted `claude-opus-4-7`, this path still
attached `temperature` and Azure 400'd with
`invalid_request_error: \`temperature\` is deprecated for this model.`

Have `_supports_temperature` also consult the shared
`providers.model_compat` registry. Same pattern as Azure / LiteLLM /
Anthropic providers from the parent commit. The error-format trail
(`Error: {'message':...}`) comes from `_handle_error` in
`openai_compat_provider.py:811`, which confirms this code path is the
one the user's config hits.

Adds two regression tests:
- `_supports_temperature` returns False for `claude-opus-4-7` under
  every provider prefix.
- `_build_kwargs` AND `_build_responses_body` both omit `temperature`
  from the outbound request body for `azure/anthropic/claude-opus-4-7`.
---
 mira_engine/agent/research_loop.py            |  2 +-
 .../providers/openai_compat_provider.py       | 11 +++-
 tests/providers/test_azure_openai_provider.py |  2 +-
 tests/providers/test_litellm_kwargs.py        | 53 +++++++++++++++++++
 tests/test_research_loop_core.py              |  7 ++-
 5 files changed, 67 insertions(+), 8 deletions(-)

diff --git a/mira_engine/agent/research_loop.py b/mira_engine/agent/research_loop.py
index f3a3705..28cbd20 100644
--- a/mira_engine/agent/research_loop.py
+++ b/mira_engine/agent/research_loop.py
@@ -331,7 +331,7 @@ def _looks_like_llm_provider_error(cls, text: str | None) -> bool:
         halt auto mode — they are NOT experiment outcomes. Without this
         guard a parameter-level 4xx (e.g. Azure dropping ``temperature``)
         gets retried every round until ``_AUTO_MAX_ROUNDS`` (20) is
-        exhausted, which is what we saw in PRJ-0002 on 2026-05-14.
+        exhausted.
 
         Unlike :meth:`_looks_like_failure_response`, this check fires
         regardless of ``strictHeuristics`` because the agent has not even
diff --git a/mira_engine/providers/openai_compat_provider.py b/mira_engine/providers/openai_compat_provider.py
index 4b4a051..54a7026 100644
--- a/mira_engine/providers/openai_compat_provider.py
+++ b/mira_engine/providers/openai_compat_provider.py
@@ -26,6 +26,7 @@
     from openai import AsyncOpenAI
 
 from mira_engine.providers.base import LLMProvider, LLMResponse, ToolCallRequest
+from mira_engine.providers.model_compat import model_supports_temperature
 from mira_engine.providers.openai_responses import (
     consume_sdk_stream,
     convert_messages,
@@ -263,11 +264,17 @@ def _supports_temperature(
     ) -> bool:
         """Return True when the model accepts a temperature parameter.
 
-        GPT-5 family and reasoning models (o1/o3/o4) reject temperature
-        when reasoning_effort is set to anything other than ``"none"``.
+        Combines two rule sets:
+          - GPT-5 / o1 / o3 / o4 deployments (and any non-``"none"``
+            ``reasoning_effort``) reject temperature.
+          - Models flagged in :mod:`providers.model_compat` (e.g.
+            Azure-proxied ``claude-opus-4-7``) reject temperature
+            regardless of the OpenAI-compatible front-end.
         """
         if reasoning_effort and reasoning_effort.lower() != "none":
             return False
+        if not model_supports_temperature(model_name):
+            return False
         name = model_name.lower()
         return not any(token in name for token in ("gpt-5", "o1", "o3", "o4"))
 
diff --git a/tests/providers/test_azure_openai_provider.py b/tests/providers/test_azure_openai_provider.py
index 615466e..a0fe144 100644
--- a/tests/providers/test_azure_openai_provider.py
+++ b/tests/providers/test_azure_openai_provider.py
@@ -91,7 +91,7 @@ def test_supports_temperature_blocks_claude_opus_4_7():
 def test_build_body_drops_temperature_for_claude_opus_4_7():
     """The Responses API body must NOT carry ``temperature`` when the model
     is on the blocklist — otherwise Azure returns ``invalid_request_error``
-    and the agent's auto-run loop wedges (see PRJ-0002 incident 2026-05-14).
+    and the agent's auto-run loop wedges.
     """
     provider = AzureOpenAIProvider(
         api_key="k",
diff --git a/tests/providers/test_litellm_kwargs.py b/tests/providers/test_litellm_kwargs.py
index 2df8dc5..49041a5 100644
--- a/tests/providers/test_litellm_kwargs.py
+++ b/tests/providers/test_litellm_kwargs.py
@@ -502,6 +502,59 @@ def test_openai_compat_supports_temperature_matches_reasoning_model_rules() -> N
     assert OpenAICompatProvider._supports_temperature("gpt-4o", reasoning_effort="medium") is False
 
 
+def test_openai_compat_supports_temperature_blocks_claude_opus_4_7() -> None:
+    """Bug 1 regression: OpenAI-compatible custom endpoints that proxy to
+    Azure-hosted ``claude-opus-4-7`` must drop ``temperature`` — otherwise
+    Azure returns ``invalid_request_error: \\`temperature\\` is deprecated for
+    this model.``.
+    """
+    assert OpenAICompatProvider._supports_temperature("claude-opus-4-7") is False
+    assert OpenAICompatProvider._supports_temperature("azure/anthropic/claude-opus-4-7") is False
+    assert OpenAICompatProvider._supports_temperature("anthropic/claude-opus-4-7") is False
+
+
+def test_openai_compat_build_kwargs_drops_temperature_for_claude_opus_4_7() -> None:
+    spec = find_by_name("custom")
+    with patch("mira_engine.providers.openai_compat_provider.AsyncOpenAI"):
+        provider = OpenAICompatProvider(
+            api_key="any",
+            default_model="azure/anthropic/claude-opus-4-7",
+            spec=spec,
+        )
+
+    kwargs = provider._build_kwargs(
+        messages=[{"role": "user", "content": "hi"}],
+        tools=None,
+        model="azure/anthropic/claude-opus-4-7",
+        max_tokens=512,
+        temperature=0.7,
+        reasoning_effort=None,
+        tool_choice=None,
+    )
+    assert "temperature" not in kwargs
+
+
+def test_openai_compat_build_responses_body_drops_temperature_for_claude_opus_4_7() -> None:
+    spec = find_by_name("custom")
+    with patch("mira_engine.providers.openai_compat_provider.AsyncOpenAI"):
+        provider = OpenAICompatProvider(
+            api_key="any",
+            default_model="azure/anthropic/claude-opus-4-7",
+            spec=spec,
+        )
+
+    body = provider._build_responses_body(
+        messages=[{"role": "user", "content": "hi"}],
+        tools=None,
+        model="azure/anthropic/claude-opus-4-7",
+        max_tokens=512,
+        temperature=0.7,
+        reasoning_effort=None,
+        tool_choice=None,
+    )
+    assert "temperature" not in body
+
+
 def test_openai_compat_build_kwargs_uses_gpt5_safe_parameters() -> None:
     spec = find_by_name("openai")
     with patch("mira_engine.providers.openai_compat_provider.AsyncOpenAI"):
diff --git a/tests/test_research_loop_core.py b/tests/test_research_loop_core.py
index fd57a9c..4b5b7a5 100644
--- a/tests/test_research_loop_core.py
+++ b/tests/test_research_loop_core.py
@@ -245,12 +245,12 @@ def test_auto_run_decision_helpers(tmp_path: Path) -> None:
         auto_round=0,
     ) is False
 
-    # Bug 3 regression: when the LLM call itself fails (parameter rejected,
+    # when the LLM call itself fails (parameter rejected,
     # gateway down, all candidates exhausted) the final_content is a system
     # error and auto-run must halt unconditionally — even when there is
     # pending work in the plan and strictHeuristics is the default. Without
     # this guard, a single bad request burns all 20 auto rounds repeatedly
-    # hitting the same error (see PRJ-0002 incident 2026-05-14).
+    # hitting the same error.
     assert ResearchAgentLoop._looks_like_llm_provider_error(
         "Error calling LLM: litellm.BadRequestError: Azure_aiException - "
         "{\"type\":\"error\",\"error\":{\"type\":\"invalid_request_error\","
@@ -283,8 +283,7 @@ def test_auto_run_decision_helpers(tmp_path: Path) -> None:
     assert decision is False
     assert reason == "llm provider error"
 
-    # Even with strictHeuristics disabled (the production config from PRJ-0002
-    # had no automation_policy, which defaults to relaxed heuristics) the
+    # Even with strictHeuristics disabled the
     # LLM provider error must still halt.
     decision, reason = loop._evaluate_continuation(
         run_mode="auto",