Refine LLM profile persistence

enyst · enyst · commit 218728ef14e7 · 2025-10-21T23:58:16.000+02:00
- move inline/profile compaction into LLM serializer/validator
- use model_dump_json context in ConversationState persistence
- add persistence settings module and cover profile reference tests
- document persistence comparison and recommendations
diff --git a/docs/llm_profiles.md b/docs/llm_profiles.md
@@ -85,3 +85,22 @@ Notes on service_id rename
 ### Follow-up coordination
 - Subsequent tasks (agent-sdk-20/21/22) will build on this foundation to expose CLI flags, update documentation, and improve secrets handling.
 
+
+## Persistence integration review
+
+### Conversation snapshots vs. profile-aware serialization
+- **Caller experience:** Conversations that opt into profile references should behave the same as the legacy inline flow. Callers still receive fully expanded `LLM` payloads when they work with `ConversationState` objects or remote conversation APIs. The only observable change is that persisted `base_state.json` files can shrink to `{ "profile_id": "<name>" }` instead of storing every field.
+- **Inline vs. referenced storage:** Conversation persistence previously delegated everything to Pydantic (`model_dump_json` / `model_validate`). The draft implementation added a recursive helper (`compact_llm_profiles` / `resolve_llm_profiles`) that walked arbitrary dictionaries and manually replaced or expanded embedded LLMs. This duplication diverged from the rest of the SDK, where polymorphic models rely on validators and discriminators to control serialization.
+- **Relationship to `DiscriminatedUnionMixin`:** That mixin exists so we can ship objects across process boundaries (e.g., remote conversations) without bespoke traversal code. Keeping serialization rules on the models themselves, rather than sprinkling special cases in persistence helpers, lets us benefit from the same rebuild/validation pipeline.
+
+### Remote conversation compatibility
+- The agent server still exposes fully inlined LLM payloads to remote clients. Because the manual compaction was only invoked when writing `base_state.json`, remote APIs were unaffected. We need to preserve that behaviour so remote callers do not have to resolve profiles themselves.
+- When a conversation is restored on the server (or locally), any profile references in `base_state.json` must be expanded **before** the state is materialised; otherwise, components that expect a concrete `LLM` instance (e.g., secret reconciliation, spend tracking) will break.
+
+### Recommendation
+- Move profile resolution/compaction into the `LLM` model:
+  - A `model_validator(mode="before")` can load `{ "profile_id": ... }` payloads with the `LLMRegistry`, while respecting `OPENHANDS_INLINE_CONVERSATIONS` (raise when inline mode is enforced but only a profile reference is available).
+  - A `model_serializer(mode="json")` can honour the same inline flag via `model_dump(..., context={"inline_llm_persistence": bool})`, returning either the full inline payload or a `{ "profile_id": ... }` stub. Callers that do not provide explicit context will continue to receive inline payloads by default.
+- Have `ConversationState._save_base_state` call `model_dump_json` with the appropriate context instead of the bespoke traversal helpers. This keeps persistence logic co-located with the models, reduces drift, and keeps remote conversations working without additional glue.
+- With this approach we still support inline overrides (`OPENHANDS_INLINE_CONVERSATIONS=true`), profile-backed storage, and remote access with no behavioural changes for callers.
+
diff --git a/openhands-sdk/openhands/sdk/conversation/persistence_utils.py b/openhands-sdk/openhands/sdk/conversation/persistence_utils.py
diff --git a/openhands-sdk/openhands/sdk/conversation/state.py b/openhands-sdk/openhands/sdk/conversation/state.py
@@ -1,5 +1,4 @@
 # state.py
-import json
 from collections.abc import Sequence
 from enum import Enum
 from typing import TYPE_CHECKING, Any, Self
@@ -11,17 +10,16 @@
 from openhands.sdk.conversation.event_store import EventLog
 from openhands.sdk.conversation.fifo_lock import FIFOLock
 from openhands.sdk.conversation.persistence_const import BASE_STATE, EVENTS_DIR
-from openhands.sdk.conversation.persistence_utils import (
-    compact_llm_profiles,
-    resolve_llm_profiles,
-    should_inline_conversations,
-)
 from openhands.sdk.conversation.secrets_manager import SecretsManager
 from openhands.sdk.conversation.types import ConversationCallbackType, ConversationID
 from openhands.sdk.event import ActionEvent, ObservationEvent, UserRejectObservation
 from openhands.sdk.event.base import Event
 from openhands.sdk.io import FileStore, InMemoryFileStore, LocalFileStore
 from openhands.sdk.logger import get_logger
+from openhands.sdk.persistence.settings import (
+    INLINE_CONTEXT_KEY,
+    should_inline_conversations,
+)
 from openhands.sdk.security.confirmation_policy import (
     ConfirmationPolicyBase,
     NeverConfirm,
@@ -139,10 +137,11 @@ def _save_base_state(self, fs: FileStore) -> None:
         Persist base state snapshot (no events; events are file-backed).
         """
         inline_mode = should_inline_conversations()
-        payload = compact_llm_profiles(
-            self.model_dump(mode="json", exclude_none=True), inline=inline_mode
+        payload = self.model_dump_json(
+            exclude_none=True,
+            context={INLINE_CONTEXT_KEY: inline_mode},
         )
-        fs.write(BASE_STATE, json.dumps(payload))
+        fs.write(BASE_STATE, payload)
 
     # ===== Factory: open-or-create (no load/save methods needed) =====
     @classmethod
@@ -170,12 +169,11 @@ def create(
             base_text = None
 
         inline_mode = should_inline_conversations()
+        context = {INLINE_CONTEXT_KEY: inline_mode}
 
         # ---- Resume path ----
         if base_text:
-            raw_payload = json.loads(base_text)
-            payload = resolve_llm_profiles(raw_payload, inline=inline_mode)
-            state = cls.model_validate(payload)
+            state = cls.model_validate_json(base_text, context=context)
 
             # Enforce conversation id match
             if state.id != id:
diff --git a/openhands-sdk/openhands/sdk/llm/llm.py b/openhands-sdk/openhands/sdk/llm/llm.py
@@ -4,7 +4,7 @@
 import json
 import os
 import warnings
-from collections.abc import Callable, Sequence
+from collections.abc import Callable, Mapping, Sequence
 from contextlib import contextmanager
 from typing import TYPE_CHECKING, Any, ClassVar, Literal, get_args, get_origin
 
@@ -16,8 +16,12 @@
     Field,
     PrivateAttr,
     SecretStr,
+    SerializationInfo,
+    SerializerFunctionWrapHandler,
+    ValidationInfo,
     field_serializer,
     field_validator,
+    model_serializer,
     model_validator,
 )
 from pydantic.json_schema import SkipJsonSchema
@@ -75,6 +79,10 @@
 from openhands.sdk.llm.utils.retry_mixin import RetryMixin
 from openhands.sdk.llm.utils.telemetry import Telemetry
 from openhands.sdk.logger import ENV_LOG_DIR, get_logger
+from openhands.sdk.persistence.settings import (
+    INLINE_CONTEXT_KEY,
+    should_inline_conversations,
+)
 
 
 logger = get_logger(__name__)
@@ -267,6 +275,22 @@ class LLM(BaseModel, RetryMixin, NonNativeToolCallingMixin):
         extra="forbid", arbitrary_types_allowed=True
     )
 
+    @model_serializer(mode="wrap", when_used="json")
+    def _serialize_with_profiles(
+        self, handler: SerializerFunctionWrapHandler, info: SerializationInfo
+    ) -> Mapping[str, Any]:
+        inline_pref = None
+        if info.context is not None and INLINE_CONTEXT_KEY in info.context:
+            inline_pref = info.context[INLINE_CONTEXT_KEY]
+        if inline_pref is None:
+            inline_pref = True
+
+        data = handler(self)
+        profile_id = data.get("profile_id") if isinstance(data, dict) else None
+        if not inline_pref and profile_id:
+            return {"profile_id": profile_id}
+        return data
+
     # =========================================================================
     # Validators
     # =========================================================================
@@ -291,11 +315,40 @@ def _validate_api_key(cls, v):
 
     @model_validator(mode="before")
     @classmethod
-    def _coerce_inputs(cls, data):
-        if not isinstance(data, dict):
+    def _coerce_inputs(cls, data: Any, info: ValidationInfo):
+        if not isinstance(data, Mapping):
             return data
         d = dict(data)
 
+        profile_id = d.get("profile_id")
+        if profile_id and "model" not in d:
+            inline_pref = None
+            if info.context is not None and INLINE_CONTEXT_KEY in info.context:
+                inline_pref = info.context[INLINE_CONTEXT_KEY]
+            if inline_pref is None:
+                inline_pref = should_inline_conversations()
+
+            if inline_pref:
+                raise ValueError(
+                    "Encountered profile reference for LLM while "
+                    "OPENHANDS_INLINE_CONVERSATIONS is enabled. "
+                    "Inline the profile or set "
+                    "OPENHANDS_INLINE_CONVERSATIONS=false."
+                )
+
+            registry = None
+            if info.context is not None:
+                registry = info.context.get("llm_registry")
+            if registry is None:
+                from openhands.sdk.llm.llm_registry import LLMRegistry
+
+                registry = LLMRegistry()
+
+            llm = registry.load_profile(profile_id)
+            expanded = llm.model_dump(exclude_none=True)
+            expanded["profile_id"] = profile_id
+            d.update(expanded)
+
         if "service_id" in d and "usage_id" not in d:
             warnings.warn(
                 SERVICE_ID_DEPRECATION_MSG,
diff --git a/openhands-sdk/openhands/sdk/persistence/__init__.py b/openhands-sdk/openhands/sdk/persistence/__init__.py
@@ -0,0 +1,10 @@
+"""Persistence configuration helpers."""
+
+from .settings import INLINE_CONTEXT_KEY, INLINE_ENV_VAR, should_inline_conversations
+
+
+__all__ = [
+    "INLINE_CONTEXT_KEY",
+    "INLINE_ENV_VAR",
+    "should_inline_conversations",
+]
diff --git a/openhands-sdk/openhands/sdk/persistence/settings.py b/openhands-sdk/openhands/sdk/persistence/settings.py
@@ -0,0 +1,17 @@
+"""Shared helpers for SDK persistence configuration."""
+
+from __future__ import annotations
+
+import os
+
+
+INLINE_ENV_VAR = "OPENHANDS_INLINE_CONVERSATIONS"
+INLINE_CONTEXT_KEY = "inline_llm_persistence"
+_FALSE_VALUES = {"0", "false", "no"}
+
+
+def should_inline_conversations() -> bool:
+    """Return True when conversations should be persisted with inline LLM payloads."""
+
+    value = os.getenv(INLINE_ENV_VAR, "true").strip().lower()
+    return value not in _FALSE_VALUES
diff --git a/tests/sdk/llm/test_llm_registry_profiles.py b/tests/sdk/llm/test_llm_registry_profiles.py
@@ -4,6 +4,7 @@
 
 from openhands.sdk.llm.llm import LLM
 from openhands.sdk.llm.llm_registry import LLMRegistry
+from openhands.sdk.persistence.settings import INLINE_CONTEXT_KEY
 
 
 def test_list_profiles_returns_sorted_names(tmp_path):
@@ -86,6 +87,32 @@ def test_register_profiles_skips_invalid_and_duplicate_profiles(tmp_path):
     assert registry.list_usage_ids() == ["shared"]
 
 
+def test_llm_serializer_respects_inline_context():
+    llm = LLM(model="gpt-4o-mini", usage_id="service", profile_id="sample")
+
+    inline_payload = llm.model_dump(mode="json")
+    assert inline_payload["model"] == "gpt-4o-mini"
+
+    referenced = llm.model_dump(mode="json", context={INLINE_CONTEXT_KEY: False})
+    assert referenced == {"profile_id": "sample"}
+
+
+def test_llm_validator_loads_profile_reference(tmp_path, monkeypatch):
+    monkeypatch.setenv("OPENHANDS_INLINE_CONVERSATIONS", "false")
+    registry = LLMRegistry(profile_dir=tmp_path)
+    source_llm = LLM(model="gpt-4o-mini", usage_id="service")
+    registry.save_profile("profile-tests", source_llm)
+
+    parsed = LLM.model_validate(
+        {"profile_id": "profile-tests"},
+        context={INLINE_CONTEXT_KEY: False, "llm_registry": registry},
+    )
+
+    assert parsed.model == source_llm.model
+    assert parsed.profile_id == "profile-tests"
+    assert parsed.usage_id == source_llm.usage_id
+
+
 def test_validate_profile_reports_errors(tmp_path):
     registry = LLMRegistry(profile_dir=tmp_path)