Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 47 additions & 0 deletions mira_engine/agent/research_loop.py
Original file line number Diff line number Diff line change
Expand Up @@ -321,6 +321,45 @@ def _looks_like_failure_response(cls, text: str | None) -> bool:
)
return any(k in tail for k in soft_signals)

@classmethod
def _looks_like_llm_provider_error(cls, text: str | None) -> bool:
"""Detect when ``final_content`` is a system-level LLM call failure.

These are surfaced by provider error handlers (``_handle_error`` in
``anthropic_provider`` / ``azure_openai_provider`` / etc., and the
chain-failure path in ``RoutedProviderManager.chat``) and must always
halt auto mode — they are NOT experiment outcomes. Without this
guard a parameter-level 4xx (e.g. Azure dropping ``temperature``)
gets retried every round until ``_AUTO_MAX_ROUNDS`` (20) is
exhausted.

Unlike :meth:`_looks_like_failure_response`, this check fires
regardless of ``strictHeuristics`` because the agent has not even
produced a turn — there is nothing to iterate on.
"""
if not text:
return False
lowered = text.lower()
markers = (
# Provider wrappers (see anthropic_provider._handle_error,
# azure_openai_provider._handle_error, openai_compat_provider,
# litellm_provider.chat, openai_codex_provider).
"error calling llm",
"error calling azure openai",
"error calling codex",
"error calling github copilot",
# Underlying SDK / gateway error types.
"litellm.badrequesterror",
"azure_aiexception",
"invalid_request_error",
"bad_request_error",
# RoutedProviderManager terminal message.
"all candidate models failed for this turn",
# base_loop fallback when an error response has no content.
"sorry, i encountered an error calling the ai model",
)
return any(marker in lowered for marker in markers)

# ------------------------------------------------------------------
# task_plan loaders / inspectors
# ------------------------------------------------------------------
Expand Down Expand Up @@ -874,6 +913,14 @@ def _evaluate_continuation(
"""
if run_mode != "auto":
return False, None
# LLM provider errors must halt the loop unconditionally — the agent
# never even produced a turn, so the next round will hit the exact
# same failure (parameter rejected, auth invalid, gateway down, ...).
# Without this guard a single bad request burns through all 20
# auto-run rounds before surfacing the error to the user.
if self._looks_like_llm_provider_error(final_content):
logger.warning("Auto mode halting: LLM provider error in final response")
return False, "llm provider error"
if not self._guard_task_plan_structure(project_dir, profile=agent_profile):
return False, "task_plan guardrail blocking"
if auto_round >= self._AUTO_MAX_ROUNDS:
Expand Down
16 changes: 14 additions & 2 deletions mira_engine/agent/routing.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,14 +281,26 @@ async def chat(
except Exception as exc:
last_error = exc
self._mark_model_failed(model)
if index < len(candidates) - 1:
# Mirror the response-path classification so a permanent 4xx
# (auth, invalid_request_error, bad request, ...) does not
# blindly burn the rest of the fallback chain — those errors
# will fail identically on every other candidate.
exc_text = str(exc) or exc.__class__.__name__
is_retryable = self._should_retry_with_fallback(exc_text)
if is_retryable and index < len(candidates) - 1:
logger.warning(
"Model '{}' raised '{}'; trying fallback model '{}'",
"Model '{}' raised retryable error '{}'; trying fallback model '{}'",
model,
exc,
candidates[index + 1],
)
continue
if not is_retryable:
logger.warning(
"Model '{}' raised non-retryable error '{}'; skipping fallback candidates",
model,
exc,
)
raise

if response.finish_reason != "error" or not self._should_retry_with_fallback(response.content):
Expand Down
14 changes: 11 additions & 3 deletions mira_engine/providers/anthropic_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import json_repair

from mira_engine.providers.base import LLMProvider, LLMResponse, ToolCallRequest
from mira_engine.providers.model_compat import model_supports_temperature

_ALNUM = string.ascii_letters + string.digits

Expand Down Expand Up @@ -380,19 +381,26 @@ def _build_kwargs(
if system:
kwargs["system"] = system

# Some models (e.g. claude-opus-4-7 fronted by Azure AI) reject
# `temperature` outright with `invalid_request_error`. Resolve this
# once via the shared registry so every code path agrees.
temperature_allowed = model_supports_temperature(model) and model_supports_temperature(model_name)

if reasoning_effort == "adaptive":
# Adaptive thinking: model decides when and how much to think
# Supported on claude-sonnet-4-6 and claude-opus-4-6.
# Also auto-enables interleaved thinking between tool calls.
kwargs["thinking"] = {"type": "adaptive"}
kwargs["temperature"] = 1.0
if temperature_allowed:
kwargs["temperature"] = 1.0
elif thinking_enabled:
budget_map = {"low": 1024, "medium": 4096, "high": max(8192, max_tokens)}
budget = budget_map.get(reasoning_effort.lower(), 4096)
kwargs["thinking"] = {"type": "enabled", "budget_tokens": budget}
kwargs["max_tokens"] = max(max_tokens, budget + 4096)
kwargs["temperature"] = 1.0
else:
if temperature_allowed:
kwargs["temperature"] = 1.0
elif temperature_allowed:
kwargs["temperature"] = temperature

if anthropic_tools:
Expand Down
14 changes: 13 additions & 1 deletion mira_engine/providers/azure_openai_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from openai import AsyncOpenAI

from mira_engine.providers.base import LLMProvider, LLMResponse
from mira_engine.providers.model_compat import model_supports_temperature
from mira_engine.providers.openai_responses import (
consume_sdk_stream,
convert_messages,
Expand Down Expand Up @@ -55,9 +56,20 @@ def _supports_temperature(
deployment_name: str,
reasoning_effort: str | None = None,
) -> bool:
"""Return True when temperature is likely supported for this deployment."""
"""Return True when temperature is likely supported for this deployment.

Combines two rule sets:
- Azure-hosted OpenAI reasoning deployments (``gpt-5``, ``o1``,
``o3``, ``o4``) and any call passing ``reasoning_effort`` drop
``temperature``.
- Models flagged in :mod:`providers.model_compat` (e.g.
Azure-hosted ``claude-opus-4-7``) drop ``temperature`` regardless
of deployment prefix.
"""
if reasoning_effort:
return False
if not model_supports_temperature(deployment_name):
return False
name = deployment_name.lower()
return not any(token in name for token in ("gpt-5", "o1", "o3", "o4"))

Expand Down
9 changes: 9 additions & 0 deletions mira_engine/providers/litellm_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from loguru import logger

from mira_engine.providers.base import LLMProvider, LLMResponse, ToolCallRequest
from mira_engine.providers.model_compat import model_supports_temperature
from mira_engine.providers.registry import find_by_model, find_gateway

# Standard chat-completion message keys.
Expand Down Expand Up @@ -247,6 +248,14 @@ async def chat(
"temperature": temperature,
}

# Strip `temperature` for models that reject it (e.g.
# ``azure/anthropic/claude-opus-4-7`` returns
# ``invalid_request_error: \`temperature\` is deprecated for this model.``).
# Done before _apply_model_overrides so a registry override can still
# re-add it intentionally if some future provider needs that.
if not model_supports_temperature(original_model) or not model_supports_temperature(model):
kwargs.pop("temperature", None)

# Apply model-specific overrides (e.g. kimi-k2.5 temperature)
self._apply_model_overrides(model, kwargs)

Expand Down
44 changes: 44 additions & 0 deletions mira_engine/providers/model_compat.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
r"""Model-level compatibility flags shared across providers.

Some newer LLMs (Anthropic Claude Opus 4-7, several OpenAI reasoning
deployments, etc.) reject the ``temperature`` parameter outright. The
property is **model-scoped**, not provider-scoped: the same model is
exposed by Anthropic directly, by Azure AI's Anthropic deployment, by
AiHubMix/OpenRouter gateways, etc., and every path must drop the
parameter or the request 400s.

Keeping the rule in one place avoids the failure mode we observed in
production where ``azure/anthropic/claude-opus-4-7`` repeatedly errored
with ``\`temperature\` is deprecated for this model.`` while every
provider builder still attached ``temperature``.

To extend, add a substring token to ``TEMPERATURE_UNSUPPORTED_MODEL_TOKENS``.
"""

from __future__ import annotations

# Substrings (case-insensitive) that identify models which reject
# `temperature`. Match is intentionally loose so it catches the model
# under every provider/gateway prefix (``anthropic/...``,
# ``azure/anthropic/...``, ``openrouter/anthropic/...``, etc.).
TEMPERATURE_UNSUPPORTED_MODEL_TOKENS: frozenset[str] = frozenset(
{
# Claude Opus 4.x on Azure AI rejects `temperature` with
# `invalid_request_error: \`temperature\` is deprecated for this model.`
# The same model on the native Anthropic API still accepts it today,
# but stripping it everywhere is safe (Anthropic defaults to 1.0).
"claude-opus-4-7",
}
)


def model_supports_temperature(model: str | None) -> bool:
"""Return True when the model is expected to accept ``temperature``.

Empty / unknown model strings default to True so we don't accidentally
suppress the parameter for ordinary models.
"""
if not model:
return True
lowered = model.lower()
return not any(token in lowered for token in TEMPERATURE_UNSUPPORTED_MODEL_TOKENS)
11 changes: 9 additions & 2 deletions mira_engine/providers/openai_compat_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
from openai import AsyncOpenAI

from mira_engine.providers.base import LLMProvider, LLMResponse, ToolCallRequest
from mira_engine.providers.model_compat import model_supports_temperature
from mira_engine.providers.openai_responses import (
consume_sdk_stream,
convert_messages,
Expand Down Expand Up @@ -263,11 +264,17 @@ def _supports_temperature(
) -> bool:
"""Return True when the model accepts a temperature parameter.

GPT-5 family and reasoning models (o1/o3/o4) reject temperature
when reasoning_effort is set to anything other than ``"none"``.
Combines two rule sets:
- GPT-5 / o1 / o3 / o4 deployments (and any non-``"none"``
``reasoning_effort``) reject temperature.
- Models flagged in :mod:`providers.model_compat` (e.g.
Azure-proxied ``claude-opus-4-7``) reject temperature
regardless of the OpenAI-compatible front-end.
"""
if reasoning_effort and reasoning_effort.lower() != "none":
return False
if not model_supports_temperature(model_name):
return False
name = model_name.lower()
return not any(token in name for token in ("gpt-5", "o1", "o3", "o4"))

Expand Down
32 changes: 32 additions & 0 deletions tests/providers/test_azure_openai_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,38 @@ def test_supports_temperature_with_reasoning_effort():
assert AzureOpenAIProvider._supports_temperature("gpt-4o", reasoning_effort="medium") is False


def test_supports_temperature_blocks_claude_opus_4_7():
"""Bug 1 regression: Azure-hosted ``claude-opus-4-7`` deployments reject
``temperature`` outright. The blocklist (via providers.model_compat) must
catch the deployment under every prefix variant.
"""
assert AzureOpenAIProvider._supports_temperature("claude-opus-4-7") is False
assert AzureOpenAIProvider._supports_temperature("azure/anthropic/claude-opus-4-7") is False
assert AzureOpenAIProvider._supports_temperature("anthropic/claude-opus-4-7") is False


def test_build_body_drops_temperature_for_claude_opus_4_7():
"""The Responses API body must NOT carry ``temperature`` when the model
is on the blocklist — otherwise Azure returns ``invalid_request_error``
and the agent's auto-run loop wedges.
"""
provider = AzureOpenAIProvider(
api_key="k",
api_base="https://r.openai.azure.com",
default_model="claude-opus-4-7",
)
body = provider._build_body(
[{"role": "user", "content": "hi"}],
None,
"azure/anthropic/claude-opus-4-7",
4096,
0.7,
None,
None,
)
assert "temperature" not in body


# ---------------------------------------------------------------------------
# _build_body — Responses API body construction
# ---------------------------------------------------------------------------
Expand Down
53 changes: 53 additions & 0 deletions tests/providers/test_litellm_kwargs.py
Original file line number Diff line number Diff line change
Expand Up @@ -502,6 +502,59 @@ def test_openai_compat_supports_temperature_matches_reasoning_model_rules() -> N
assert OpenAICompatProvider._supports_temperature("gpt-4o", reasoning_effort="medium") is False


def test_openai_compat_supports_temperature_blocks_claude_opus_4_7() -> None:
"""Bug 1 regression: OpenAI-compatible custom endpoints that proxy to
Azure-hosted ``claude-opus-4-7`` must drop ``temperature`` — otherwise
Azure returns ``invalid_request_error: \\`temperature\\` is deprecated for
this model.``.
"""
assert OpenAICompatProvider._supports_temperature("claude-opus-4-7") is False
assert OpenAICompatProvider._supports_temperature("azure/anthropic/claude-opus-4-7") is False
assert OpenAICompatProvider._supports_temperature("anthropic/claude-opus-4-7") is False


def test_openai_compat_build_kwargs_drops_temperature_for_claude_opus_4_7() -> None:
spec = find_by_name("custom")
with patch("mira_engine.providers.openai_compat_provider.AsyncOpenAI"):
provider = OpenAICompatProvider(
api_key="any",
default_model="azure/anthropic/claude-opus-4-7",
spec=spec,
)

kwargs = provider._build_kwargs(
messages=[{"role": "user", "content": "hi"}],
tools=None,
model="azure/anthropic/claude-opus-4-7",
max_tokens=512,
temperature=0.7,
reasoning_effort=None,
tool_choice=None,
)
assert "temperature" not in kwargs


def test_openai_compat_build_responses_body_drops_temperature_for_claude_opus_4_7() -> None:
spec = find_by_name("custom")
with patch("mira_engine.providers.openai_compat_provider.AsyncOpenAI"):
provider = OpenAICompatProvider(
api_key="any",
default_model="azure/anthropic/claude-opus-4-7",
spec=spec,
)

body = provider._build_responses_body(
messages=[{"role": "user", "content": "hi"}],
tools=None,
model="azure/anthropic/claude-opus-4-7",
max_tokens=512,
temperature=0.7,
reasoning_effort=None,
tool_choice=None,
)
assert "temperature" not in body


def test_openai_compat_build_kwargs_uses_gpt5_safe_parameters() -> None:
spec = find_by_name("openai")
with patch("mira_engine.providers.openai_compat_provider.AsyncOpenAI"):
Expand Down
39 changes: 39 additions & 0 deletions tests/providers/test_model_compat.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
"""Tests for the shared model-compatibility helper."""

from __future__ import annotations

from mira_engine.providers.model_compat import (
TEMPERATURE_UNSUPPORTED_MODEL_TOKENS,
model_supports_temperature,
)


def test_ordinary_models_support_temperature():
assert model_supports_temperature("anthropic/claude-sonnet-4-5") is True
assert model_supports_temperature("openai/gpt-4o") is True
assert model_supports_temperature("gpt-4.1-mini") is True


def test_empty_or_none_model_defaults_to_supported():
# We don't want to silently strip temperature for unknown models —
# only an explicit token match disables it.
assert model_supports_temperature(None) is True
assert model_supports_temperature("") is True


def test_claude_opus_4_7_is_blocked_under_every_provider_prefix():
# Bug 1: in production we observed `azure/anthropic/claude-opus-4-7`
# rejecting `temperature` with
# invalid_request_error: `temperature` is deprecated for this model.
# The token rule must catch the model under every prefix variant.
assert model_supports_temperature("claude-opus-4-7") is False
assert model_supports_temperature("anthropic/claude-opus-4-7") is False
assert model_supports_temperature("azure/anthropic/claude-opus-4-7") is False
assert model_supports_temperature("openrouter/anthropic/claude-opus-4-7") is False
assert model_supports_temperature("Azure/Anthropic/Claude-Opus-4-7") is False


def test_blocklist_is_a_frozenset():
# Guards against accidental in-place mutation from another module.
assert isinstance(TEMPERATURE_UNSUPPORTED_MODEL_TOKENS, frozenset)
assert "claude-opus-4-7" in TEMPERATURE_UNSUPPORTED_MODEL_TOKENS
Loading
Loading