diff --git a/frontend/src/types.ts b/frontend/src/types.ts index 0cc9226..84ff6c7 100644 --- a/frontend/src/types.ts +++ b/frontend/src/types.ts @@ -997,6 +997,7 @@ export interface SystemConfigValues { llm_api_key?: string | null llm_model?: string | null llm_timeout_seconds: number + llm_temperature?: number | null llm_allow_mock_fallback: boolean llm_secondary_api_base?: string | null llm_secondary_api_key?: string | null diff --git a/frontend/src/views/staff/StaffDashboard.test.tsx b/frontend/src/views/staff/StaffDashboard.test.tsx index b6a2e64..85e43b9 100644 --- a/frontend/src/views/staff/StaffDashboard.test.tsx +++ b/frontend/src/views/staff/StaffDashboard.test.tsx @@ -73,6 +73,7 @@ describe('StaffDashboard', () => { llm_api_key: 'sk-test', llm_model: 'kimi-k2.5', llm_timeout_seconds: 20, + llm_temperature: null, llm_allow_mock_fallback: false, llm_secondary_api_base: null, llm_secondary_api_key: null, diff --git a/frontend/src/views/staff/SystemConfig.test.tsx b/frontend/src/views/staff/SystemConfig.test.tsx index fde7625..cf78538 100644 --- a/frontend/src/views/staff/SystemConfig.test.tsx +++ b/frontend/src/views/staff/SystemConfig.test.tsx @@ -49,6 +49,7 @@ function buildValues(): SystemConfigValues { llm_api_key: 'sk-test', llm_model: 'kimi-k2.5', llm_timeout_seconds: 20, + llm_temperature: null, llm_allow_mock_fallback: false, llm_secondary_api_base: null, llm_secondary_api_key: null, diff --git a/frontend/src/views/staff/systemConfigSchema.ts b/frontend/src/views/staff/systemConfigSchema.ts index 9865ac3..78b9268 100644 --- a/frontend/src/views/staff/systemConfigSchema.ts +++ b/frontend/src/views/staff/systemConfigSchema.ts @@ -121,6 +121,14 @@ export const systemConfigSections: SystemConfigSectionDefinition[] = [ input: 'number', step: '0.1', }, + { + key: 'llm_temperature', + label: 'LLM temperature', + description: 'Sampling temperature sent to the LLM. Some models (e.g. kimi-k2.5) only accept 1. Leave blank for the default (0.2).', + input: 'number', + step: '0.1', + placeholder: '0.2', + }, { key: 'llm_allow_mock_fallback', label: 'Allow mock fallback', diff --git a/src/dibble/config.py b/src/dibble/config.py index 1f0f97b..346ab5e 100644 --- a/src/dibble/config.py +++ b/src/dibble/config.py @@ -30,6 +30,7 @@ class Settings: llm_api_key: str | None = None llm_model: str | None = None llm_timeout_seconds: float = 20.0 + llm_temperature: float | None = None llm_allow_mock_fallback: bool = True llm_secondary_api_base: str | None = None llm_secondary_api_key: str | None = None diff --git a/src/dibble/models/admin.py b/src/dibble/models/admin.py index ef2232a..7d4f258 100644 --- a/src/dibble/models/admin.py +++ b/src/dibble/models/admin.py @@ -17,6 +17,7 @@ class SystemConfigValues(BaseModel): llm_api_key: str | None = None llm_model: str | None = None llm_timeout_seconds: float + llm_temperature: float | None = None llm_allow_mock_fallback: bool llm_secondary_api_base: str | None = None llm_secondary_api_key: str | None = None @@ -64,6 +65,7 @@ class SystemConfigUpdateRequest(BaseModel): llm_api_key: str | None = None llm_model: str | None = None llm_timeout_seconds: float | None = None + llm_temperature: float | None = None llm_allow_mock_fallback: bool | None = None llm_secondary_api_base: str | None = None llm_secondary_api_key: str | None = None diff --git a/src/dibble/services/llm_client.py b/src/dibble/services/llm_client.py index 3dc92ed..ce6808d 100644 --- a/src/dibble/services/llm_client.py +++ b/src/dibble/services/llm_client.py @@ -68,6 +68,8 @@ def post_event_stream( class OpenAICompatibleChatClient: + DEFAULT_TEMPERATURE = 0.2 + def __init__( self, *, @@ -75,6 +77,7 @@ def __init__( api_key: str, model: str, timeout_seconds: float = 20.0, + temperature: float | None = None, transport: Transport = post_json, stream_transport: StreamTransport = post_event_stream, ) -> None: @@ -82,16 +85,19 @@ def __init__( self.api_key = api_key self.model = model self.timeout_seconds = timeout_seconds + self.default_temperature = ( + temperature if temperature is not None else self.DEFAULT_TEMPERATURE + ) self.transport = transport self.stream_transport = stream_transport def complete( - self, *, system_prompt: str, user_prompt: str, temperature: float = 0.2 + self, *, system_prompt: str, user_prompt: str, temperature: float | None = None ) -> LLMCompletion: response = self._complete_with_temperature( system_prompt=system_prompt, user_prompt=user_prompt, - temperature=temperature, + temperature=temperature if temperature is not None else self.default_temperature, ) return LLMCompletion( content=self._extract_content(response), @@ -104,12 +110,12 @@ def stream_complete( *, system_prompt: str, user_prompt: str, - temperature: float = 0.2, + temperature: float | None = None, ) -> Iterator[str]: for line in self._stream_lines_with_temperature( system_prompt=system_prompt, user_prompt=user_prompt, - temperature=temperature, + temperature=temperature if temperature is not None else self.default_temperature, ): stripped = line.strip() if not stripped or not stripped.startswith("data: "): diff --git a/src/dibble/services/llm_provider.py b/src/dibble/services/llm_provider.py index 03a8ce7..02aa38c 100644 --- a/src/dibble/services/llm_provider.py +++ b/src/dibble/services/llm_provider.py @@ -44,6 +44,7 @@ class LLMProviderConfig: api_key: str | None model: str | None timeout_seconds: float + temperature: float | None = None allow_mock_fallback: bool = True @@ -66,6 +67,7 @@ def build_llm_clients( api_key=settings.llm_api_key, model=settings.llm_model, timeout_seconds=settings.llm_timeout_seconds, + temperature=settings.llm_temperature, allow_mock_fallback=settings.llm_allow_mock_fallback, ), LLMProviderConfig( @@ -75,6 +77,7 @@ def build_llm_clients( model=settings.llm_secondary_model, timeout_seconds=settings.llm_secondary_timeout_seconds or settings.llm_timeout_seconds, + temperature=settings.llm_temperature, allow_mock_fallback=settings.llm_allow_mock_fallback, ), ] @@ -91,6 +94,7 @@ def build_llm_clients( api_key=config.api_key, model=config.model, timeout_seconds=config.timeout_seconds, + temperature=config.temperature, ), ) ) diff --git a/tests/test_admin_config.py b/tests/test_admin_config.py index bdec321..8f2c3a9 100644 --- a/tests/test_admin_config.py +++ b/tests/test_admin_config.py @@ -59,6 +59,7 @@ def _make_update_request(**overrides): validator_plugin="dibble.plugins.defaults.validator:build", llm_api_base="https://api.openai.com/v1", llm_timeout_seconds=20.0, + llm_temperature=None, llm_allow_mock_fallback=True, llm_circuit_breaker_threshold=2, llm_circuit_breaker_cooldown_seconds=30.0, diff --git a/tests/test_provider.py b/tests/test_provider.py index d96be94..4b71336 100644 --- a/tests/test_provider.py +++ b/tests/test_provider.py @@ -90,7 +90,7 @@ def __init__( self.stream_calls = 0 def complete( - self, *, system_prompt: str, user_prompt: str, temperature: float = 0.2 + self, *, system_prompt: str, user_prompt: str, temperature: float | None = None ): self.complete_calls += 1 if self.clock is not None: @@ -105,7 +105,7 @@ def __init__(self, content: str) -> None: return Result(self.content or "") def stream_complete( - self, *, system_prompt: str, user_prompt: str, temperature: float = 0.2 + self, *, system_prompt: str, user_prompt: str, temperature: float | None = None ): self.stream_calls += 1 if self.clock is not None: diff --git a/tests/test_telemetry.py b/tests/test_telemetry.py index b78e5ba..91eb577 100644 --- a/tests/test_telemetry.py +++ b/tests/test_telemetry.py @@ -22,19 +22,19 @@ class AlwaysFailsClient: def complete( - self, *, system_prompt: str, user_prompt: str, temperature: float = 0.2 + self, *, system_prompt: str, user_prompt: str, temperature: float | None = None ): raise LLMClientError("boom") def stream_complete( - self, *, system_prompt: str, user_prompt: str, temperature: float = 0.2 + self, *, system_prompt: str, user_prompt: str, temperature: float | None = None ): raise LLMClientError("boom") class SucceedsClient: def complete( - self, *, system_prompt: str, user_prompt: str, temperature: float = 0.2 + self, *, system_prompt: str, user_prompt: str, temperature: float | None = None ): class Result: content = ( @@ -47,7 +47,7 @@ class Result: return Result() def stream_complete( - self, *, system_prompt: str, user_prompt: str, temperature: float = 0.2 + self, *, system_prompt: str, user_prompt: str, temperature: float | None = None ): yield '{"block_index":0,"kind":"summary","title":"Backup","body_delta":"Recovered output.","done":true}\n'