Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 30 additions & 1 deletion mira_engine/providers/litellm_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,12 +325,41 @@ async def chat(
response = await acompletion(**kwargs)
return self._parse_response(response)
except Exception as e:
# Return error as content for graceful handling
# Some providers (notably Moonshot's Kimi K2 / thinking models)
# enforce temperature=1 and reject any other value with a 400.
# Retry once with temperature=1.0 instead of bubbling the error up,
# so newly released models we haven't registered overrides for
# still succeed.
if self._is_temperature_one_required(e) and kwargs.get("temperature") != 1.0:
logger.warning(
"Provider rejected temperature={}; retrying with temperature=1.0 (model={})",
kwargs.get("temperature"), kwargs.get("model"),
)
kwargs["temperature"] = 1.0
try:
response = await acompletion(**kwargs)
return self._parse_response(response)
except Exception as retry_err:
e = retry_err

return LLMResponse(
content=f"Error calling LLM: {str(e)}",
finish_reason="error",
)

@staticmethod
def _is_temperature_one_required(err: Exception) -> bool:
"""Detect provider errors that demand temperature=1 (e.g. Moonshot Kimi K2)."""
msg = str(err).lower()
if "temperature" not in msg:
return False
return (
"only 1 is allowed" in msg
or "only 1.0 is allowed" in msg
or "must be 1" in msg
or "temperature=1" in msg
)

def _parse_response(self, response: Any) -> LLMResponse:
"""Parse LiteLLM response into our standard format."""
choice = response.choices[0]
Expand Down
12 changes: 10 additions & 2 deletions mira_engine/providers/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -359,7 +359,12 @@ def label(self) -> str:
),
# Moonshot: Kimi models, needs "moonshot/" prefix.
# LiteLLM requires MOONSHOT_API_BASE env var to find the endpoint.
# Kimi K2.5 API enforces temperature >= 1.0.
# Moonshot enforces temperature=1 on the entire Kimi K2 family
# (kimi-k2, kimi-k2-turbo, kimi-k2.5, kimi-k2.5-turbo, ...) and on
# the thinking/reasoning preview models (kimi-thinking-preview,
# kimi-k2-thinking, ...). Catch both prefixes so new releases stay covered.
# LiteLLMProvider.chat() also retries once with temperature=1.0 if the
# server still rejects the request, as a defense against future variants.
ProviderSpec(
name="moonshot",
keywords=("moonshot", "kimi"),
Expand All @@ -374,7 +379,10 @@ def label(self) -> str:
detect_by_base_keyword="",
default_api_base="https://api.moonshot.ai/v1", # intl; use api.moonshot.cn for China
strip_model_prefix=False,
model_overrides=(("kimi-k2.5", {"temperature": 1.0}),),
model_overrides=(
("kimi-k2", {"temperature": 1.0}),
("kimi-thinking", {"temperature": 1.0}),
),
),
# MiniMax: needs "minimax/" prefix for LiteLLM routing.
# Uses OpenAI-compatible API at api.minimax.io/v1.
Expand Down
104 changes: 104 additions & 0 deletions tests/providers/test_litellm_provider.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
from __future__ import annotations

from types import SimpleNamespace
from unittest.mock import AsyncMock, patch

import pytest

from mira_engine.providers.litellm_provider import LiteLLMProvider

Expand All @@ -27,3 +30,104 @@ def test_litellm_parse_preserves_reasoning_from_provider_fields() -> None:
assert result.content == "final answer"
assert result.reasoning_content == "hidden reasoning"
assert result.thinking_blocks == [{"type": "thinking", "thinking": "hidden"}]


# ---------------------------------------------------------------------------
# Moonshot / Kimi temperature=1 enforcement
# ---------------------------------------------------------------------------


def test_moonshot_k2_family_gets_temperature_override() -> None:
"""All kimi-k2* and kimi-thinking* names should be clamped to temperature=1.0."""
provider = LiteLLMProvider(default_model="kimi-k2-turbo-preview")
for model in (
"moonshot/kimi-k2",
"moonshot/kimi-k2-turbo",
"moonshot/kimi-k2-turbo-preview",
"moonshot/kimi-k2.5",
"moonshot/kimi-k2.5-turbo",
"moonshot/kimi-thinking-preview",
):
kwargs = {"temperature": 0.7}
provider._apply_model_overrides(model, kwargs)
assert kwargs["temperature"] == 1.0, f"{model} should be clamped to 1.0"


def test_moonshot_v1_models_keep_caller_temperature() -> None:
"""Plain moonshot-v1-* chat models accept any temperature; no override."""
provider = LiteLLMProvider(default_model="moonshot-v1-128k")
kwargs = {"temperature": 0.3}
provider._apply_model_overrides("moonshot/moonshot-v1-128k", kwargs)
assert kwargs["temperature"] == 0.3


@pytest.mark.parametrize(
"message",
[
"MoonshotException - invalid temperature: only 1 is allowed for this model",
"Bad temperature, only 1.0 is allowed",
"temperature must be 1",
],
)
def test_is_temperature_one_required_recognizes_provider_messages(message: str) -> None:
err = RuntimeError(message)
assert LiteLLMProvider._is_temperature_one_required(err) is True


def test_is_temperature_one_required_ignores_unrelated_errors() -> None:
assert LiteLLMProvider._is_temperature_one_required(RuntimeError("rate limit")) is False
assert LiteLLMProvider._is_temperature_one_required(RuntimeError("temperature too high")) is False


@pytest.mark.asyncio
async def test_chat_retries_with_temperature_one_on_provider_rejection() -> None:
"""If the API rejects the request demanding temperature=1, retry once with 1.0."""
success_message = SimpleNamespace(content="ok", tool_calls=None, provider_specific_fields=None)
success_response = _fake_response(success_message)

mock_acompletion = AsyncMock(
side_effect=[
RuntimeError(
"litellm.BadRequestError: MoonshotException - "
"invalid temperature: only 1 is allowed for this model"
),
success_response,
]
)

with patch("mira_engine.providers.litellm_provider.acompletion", mock_acompletion):
provider = LiteLLMProvider(default_model="moonshot/kimi-future-model")
result = await provider.chat(
messages=[{"role": "user", "content": "hello"}],
model="moonshot/kimi-future-model",
temperature=0.5,
)

assert result.finish_reason == "stop"
assert result.content == "ok"
assert mock_acompletion.await_count == 2
assert mock_acompletion.await_args_list[0].kwargs["temperature"] == 0.5
assert mock_acompletion.await_args_list[1].kwargs["temperature"] == 1.0


@pytest.mark.asyncio
async def test_chat_does_not_retry_when_already_at_temperature_one() -> None:
"""No infinite retries — if we already sent temperature=1.0, bubble the error up."""
mock_acompletion = AsyncMock(
side_effect=RuntimeError(
"MoonshotException - invalid temperature: only 1 is allowed for this model"
)
)

with patch("mira_engine.providers.litellm_provider.acompletion", mock_acompletion):
provider = LiteLLMProvider(default_model="moonshot/kimi-k2-turbo")
result = await provider.chat(
messages=[{"role": "user", "content": "hello"}],
model="moonshot/kimi-k2-turbo",
temperature=0.5, # will be overridden to 1.0 by registry → no retry
)

assert result.finish_reason == "error"
assert "only 1 is allowed" in result.content
assert mock_acompletion.await_count == 1
assert mock_acompletion.await_args_list[0].kwargs["temperature"] == 1.0
Loading