diff --git a/mira_engine/providers/litellm_provider.py b/mira_engine/providers/litellm_provider.py index cceabb3..76255f6 100644 --- a/mira_engine/providers/litellm_provider.py +++ b/mira_engine/providers/litellm_provider.py @@ -325,12 +325,41 @@ async def chat( response = await acompletion(**kwargs) return self._parse_response(response) except Exception as e: - # Return error as content for graceful handling + # Some providers (notably Moonshot's Kimi K2 / thinking models) + # enforce temperature=1 and reject any other value with a 400. + # Retry once with temperature=1.0 instead of bubbling the error up, + # so newly released models we haven't registered overrides for + # still succeed. + if self._is_temperature_one_required(e) and kwargs.get("temperature") != 1.0: + logger.warning( + "Provider rejected temperature={}; retrying with temperature=1.0 (model={})", + kwargs.get("temperature"), kwargs.get("model"), + ) + kwargs["temperature"] = 1.0 + try: + response = await acompletion(**kwargs) + return self._parse_response(response) + except Exception as retry_err: + e = retry_err + return LLMResponse( content=f"Error calling LLM: {str(e)}", finish_reason="error", ) + @staticmethod + def _is_temperature_one_required(err: Exception) -> bool: + """Detect provider errors that demand temperature=1 (e.g. Moonshot Kimi K2).""" + msg = str(err).lower() + if "temperature" not in msg: + return False + return ( + "only 1 is allowed" in msg + or "only 1.0 is allowed" in msg + or "must be 1" in msg + or "temperature=1" in msg + ) + def _parse_response(self, response: Any) -> LLMResponse: """Parse LiteLLM response into our standard format.""" choice = response.choices[0] diff --git a/mira_engine/providers/registry.py b/mira_engine/providers/registry.py index 39b6256..44f116c 100644 --- a/mira_engine/providers/registry.py +++ b/mira_engine/providers/registry.py @@ -359,7 +359,12 @@ def label(self) -> str: ), # Moonshot: Kimi models, needs "moonshot/" prefix. # LiteLLM requires MOONSHOT_API_BASE env var to find the endpoint. - # Kimi K2.5 API enforces temperature >= 1.0. + # Moonshot enforces temperature=1 on the entire Kimi K2 family + # (kimi-k2, kimi-k2-turbo, kimi-k2.5, kimi-k2.5-turbo, ...) and on + # the thinking/reasoning preview models (kimi-thinking-preview, + # kimi-k2-thinking, ...). Catch both prefixes so new releases stay covered. + # LiteLLMProvider.chat() also retries once with temperature=1.0 if the + # server still rejects the request, as a defense against future variants. ProviderSpec( name="moonshot", keywords=("moonshot", "kimi"), @@ -374,7 +379,10 @@ def label(self) -> str: detect_by_base_keyword="", default_api_base="https://api.moonshot.ai/v1", # intl; use api.moonshot.cn for China strip_model_prefix=False, - model_overrides=(("kimi-k2.5", {"temperature": 1.0}),), + model_overrides=( + ("kimi-k2", {"temperature": 1.0}), + ("kimi-thinking", {"temperature": 1.0}), + ), ), # MiniMax: needs "minimax/" prefix for LiteLLM routing. # Uses OpenAI-compatible API at api.minimax.io/v1. diff --git a/tests/providers/test_litellm_provider.py b/tests/providers/test_litellm_provider.py index 5279d1b..2da36d6 100644 --- a/tests/providers/test_litellm_provider.py +++ b/tests/providers/test_litellm_provider.py @@ -1,6 +1,9 @@ from __future__ import annotations from types import SimpleNamespace +from unittest.mock import AsyncMock, patch + +import pytest from mira_engine.providers.litellm_provider import LiteLLMProvider @@ -27,3 +30,104 @@ def test_litellm_parse_preserves_reasoning_from_provider_fields() -> None: assert result.content == "final answer" assert result.reasoning_content == "hidden reasoning" assert result.thinking_blocks == [{"type": "thinking", "thinking": "hidden"}] + + +# --------------------------------------------------------------------------- +# Moonshot / Kimi temperature=1 enforcement +# --------------------------------------------------------------------------- + + +def test_moonshot_k2_family_gets_temperature_override() -> None: + """All kimi-k2* and kimi-thinking* names should be clamped to temperature=1.0.""" + provider = LiteLLMProvider(default_model="kimi-k2-turbo-preview") + for model in ( + "moonshot/kimi-k2", + "moonshot/kimi-k2-turbo", + "moonshot/kimi-k2-turbo-preview", + "moonshot/kimi-k2.5", + "moonshot/kimi-k2.5-turbo", + "moonshot/kimi-thinking-preview", + ): + kwargs = {"temperature": 0.7} + provider._apply_model_overrides(model, kwargs) + assert kwargs["temperature"] == 1.0, f"{model} should be clamped to 1.0" + + +def test_moonshot_v1_models_keep_caller_temperature() -> None: + """Plain moonshot-v1-* chat models accept any temperature; no override.""" + provider = LiteLLMProvider(default_model="moonshot-v1-128k") + kwargs = {"temperature": 0.3} + provider._apply_model_overrides("moonshot/moonshot-v1-128k", kwargs) + assert kwargs["temperature"] == 0.3 + + +@pytest.mark.parametrize( + "message", + [ + "MoonshotException - invalid temperature: only 1 is allowed for this model", + "Bad temperature, only 1.0 is allowed", + "temperature must be 1", + ], +) +def test_is_temperature_one_required_recognizes_provider_messages(message: str) -> None: + err = RuntimeError(message) + assert LiteLLMProvider._is_temperature_one_required(err) is True + + +def test_is_temperature_one_required_ignores_unrelated_errors() -> None: + assert LiteLLMProvider._is_temperature_one_required(RuntimeError("rate limit")) is False + assert LiteLLMProvider._is_temperature_one_required(RuntimeError("temperature too high")) is False + + +@pytest.mark.asyncio +async def test_chat_retries_with_temperature_one_on_provider_rejection() -> None: + """If the API rejects the request demanding temperature=1, retry once with 1.0.""" + success_message = SimpleNamespace(content="ok", tool_calls=None, provider_specific_fields=None) + success_response = _fake_response(success_message) + + mock_acompletion = AsyncMock( + side_effect=[ + RuntimeError( + "litellm.BadRequestError: MoonshotException - " + "invalid temperature: only 1 is allowed for this model" + ), + success_response, + ] + ) + + with patch("mira_engine.providers.litellm_provider.acompletion", mock_acompletion): + provider = LiteLLMProvider(default_model="moonshot/kimi-future-model") + result = await provider.chat( + messages=[{"role": "user", "content": "hello"}], + model="moonshot/kimi-future-model", + temperature=0.5, + ) + + assert result.finish_reason == "stop" + assert result.content == "ok" + assert mock_acompletion.await_count == 2 + assert mock_acompletion.await_args_list[0].kwargs["temperature"] == 0.5 + assert mock_acompletion.await_args_list[1].kwargs["temperature"] == 1.0 + + +@pytest.mark.asyncio +async def test_chat_does_not_retry_when_already_at_temperature_one() -> None: + """No infinite retries — if we already sent temperature=1.0, bubble the error up.""" + mock_acompletion = AsyncMock( + side_effect=RuntimeError( + "MoonshotException - invalid temperature: only 1 is allowed for this model" + ) + ) + + with patch("mira_engine.providers.litellm_provider.acompletion", mock_acompletion): + provider = LiteLLMProvider(default_model="moonshot/kimi-k2-turbo") + result = await provider.chat( + messages=[{"role": "user", "content": "hello"}], + model="moonshot/kimi-k2-turbo", + temperature=0.5, # will be overridden to 1.0 by registry → no retry + ) + + assert result.finish_reason == "error" + assert "only 1 is allowed" in result.content + assert mock_acompletion.await_count == 1 + assert mock_acompletion.await_args_list[0].kwargs["temperature"] == 1.0