plastic-labs · Oxygen56 · Jun 6, 2026 · Jun 6, 2026 · Jun 10, 2026 · Jun 10, 2026
diff --git a/sdks/python/src/honcho/api_types.py b/sdks/python/src/honcho/api_types.py
@@ -41,6 +41,7 @@ class SummaryConfiguration(BaseModel):
     enabled: bool | None = None
     messages_per_short_summary: int | None = None
     messages_per_long_summary: int | None = None
+    custom_instructions: str | None = None
 
 
 class DreamConfiguration(BaseModel):

diff --git a/src/schemas/configuration.py b/src/schemas/configuration.py
@@ -61,6 +61,15 @@ class SummaryConfiguration(BaseModel):
         ge=20,
         description="Number of messages per long summary. Must be positive, greater than or equal to 20, and greater than messages_per_short_summary.",
     )
+    custom_instructions: str | None = Field(
+        default=None,
+        description="Optional custom instructions for session summaries. Rejected if they exceed the summarizer custom-instruction token cap.",
+    )
+
+    @field_validator("custom_instructions")
+    @classmethod
+    def validate_custom_instructions(cls, value: str | None) -> str | None:
+        return _validate_custom_instructions_budget(value)
 
     @model_validator(mode="after")
     def validate_summary_thresholds(self) -> Self:
@@ -172,6 +181,12 @@ class ResolvedSummaryConfiguration(BaseModel):
     enabled: bool
     messages_per_short_summary: int
     messages_per_long_summary: int
+    custom_instructions: str | None = None
+
+    @field_validator("custom_instructions")
+    @classmethod
+    def validate_custom_instructions(cls, value: str | None) -> str | None:
+        return _validate_custom_instructions_budget(value)
 
 
 class ResolvedDreamConfiguration(BaseModel):

diff --git a/src/utils/summarizer.py b/src/utils/summarizer.py
@@ -14,6 +14,8 @@
 from src.config import ConfiguredModelSettings, settings
 from src.crud.session import session_cache_key
 from src.dependencies import tracked_db
+# TODO: move _custom_instructions_section to shared utility
+from src.deriver.prompts import _custom_instructions_section
 from src.exceptions import ResourceNotFoundException
 from src.llm import HonchoLLMCallResponse, honcho_llm_call
 from src.llm.types import LLMTelemetryContext
@@ -57,6 +59,7 @@ class Summary(TypedDict):
 
 
 def to_schema_summary(s: Summary) -> schemas.Summary:
+    """Convert a Summary TypedDict to a Pydantic Summary schema object."""
     return schemas.Summary(
         content=s["content"],
         message_id=s["message_id"],
@@ -81,6 +84,7 @@ def to_schema_summary(s: Summary) -> schemas.Summary:
 
 
 def _get_summary_model_config() -> ConfiguredModelSettings:
+    """Return the configured model settings for summary generation."""
     return settings.SUMMARY.MODEL_CONFIG
 
 
@@ -101,8 +105,10 @@ def short_summary_prompt(
     formatted_messages: str,
     output_words: int,
     previous_summary_text: str,
+    custom_instructions: str | None = None,
 ) -> str:
     """Generate the short summary prompt."""
+    custom_instructions_section = _custom_instructions_section(custom_instructions)
     return c(f"""
 You are a system that summarizes parts of a conversation to create a concise and accurate summary. Focus on capturing:
 
@@ -117,6 +123,7 @@ def short_summary_prompt(
 
 Return only the summary without any explanation or meta-commentary.
 
+{custom_instructions_section}
 <previous_summary>
 {previous_summary_text}
 </previous_summary>
@@ -133,8 +140,10 @@ def long_summary_prompt(
     formatted_messages: str,
     output_words: int,
     previous_summary_text: str,
+    custom_instructions: str | None = None,
 ) -> str:
     """Generate the long summary prompt."""
+    custom_instructions_section = _custom_instructions_section(custom_instructions)
     return c(f"""
 You are a system that creates thorough, comprehensive summaries of conversations. Focus on capturing:
 
@@ -151,6 +160,7 @@ def long_summary_prompt(
 
 Return only the summary without any explanation or meta-commentary.
 
+{custom_instructions_section}
 <previous_summary>
 {previous_summary_text}
 </previous_summary>
@@ -164,45 +174,60 @@ def long_summary_prompt(
 
 
 @cache
-def estimate_short_summary_prompt_tokens() -> int:
-    """Estimate tokens for the short summary prompt (without messages/previous_summary)."""
+def estimate_short_summary_prompt_tokens(custom_instructions: str | None = None) -> int:
+    """Estimate tokens for the short summary prompt, optionally including custom instructions."""
     try:
         return estimate_tokens(
             short_summary_prompt(
                 formatted_messages="",
                 output_words=0,
                 previous_summary_text="",
+                custom_instructions=custom_instructions,
             )
         )
     except Exception:
-        # Return a rough estimate if estimation fails
         return 200
 
 
 @cache
-def estimate_long_summary_prompt_tokens() -> int:
-    """Estimate tokens for the long summary prompt (without messages/previous_summary)."""
+def estimate_long_summary_prompt_tokens(custom_instructions: str | None = None) -> int:
+    """Estimate tokens for the long summary prompt, optionally including custom instructions."""
     try:
         return estimate_tokens(
             long_summary_prompt(
                 formatted_messages="",
                 output_words=0,
                 previous_summary_text="",
+                custom_instructions=None,
             )
         )
     except Exception:
-        # Return a rough estimate if estimation fails
         return 200
 
 
+
 @conditional_observe(name="Create Short Summary")
 async def create_short_summary(
     formatted_messages: str,
     input_tokens: int,
     previous_summary: str | None = None,
+    custom_instructions: str | None = None,
     *,
     workspace_name: str | None = None,
 ) -> HonchoLLMCallResponse[str]:
+    """
+    Generate a short summary via an LLM call.
+
+    Args:
+        formatted_messages: Pre-formatted conversation messages.
+        input_tokens: Token count of the input (messages + previous summary).
+        previous_summary: Previous summary text for continuity, if any.
+        custom_instructions: Optional custom instructions from configuration.
+        workspace_name: Workspace name for telemetry attribution.
+
+    Returns:
+        The LLM response containing the short summary text and token counts.
+    """
     # input_tokens indicates how many tokens the message list + previous summary take up
     # we want to optimize short summaries to be smaller than the actual content being summarized
     # so we ask the agent to produce a word count roughly equal to either the input, or the max
@@ -216,7 +241,10 @@ async def create_short_summary(
         previous_summary_text = "There is no previous summary -- the messages are the beginning of the conversation."
 
     prompt = short_summary_prompt(
-        formatted_messages, output_words, previous_summary_text
+        formatted_messages,
+        output_words,
+        previous_summary_text,
+        custom_instructions=custom_instructions,
     )
 
     return await honcho_llm_call(
@@ -235,9 +263,22 @@ async def create_short_summary(
 async def create_long_summary(
     formatted_messages: str,
     previous_summary: str | None = None,
+    custom_instructions: str | None = None,
     *,
     workspace_name: str | None = None,
 ) -> HonchoLLMCallResponse[str]:
+    """
+    Generate a comprehensive long summary via an LLM call.
+
+    Args:
+        formatted_messages: Pre-formatted conversation messages.
+        previous_summary: Previous summary text for continuity, if any.
+        custom_instructions: Optional custom instructions from configuration.
+        workspace_name: Workspace name for telemetry attribution.
+
+    Returns:
+        The LLM response containing the long summary text and token counts.
+    """
     # the word/token ratio is roughly 4:3 so we multiply by 0.75.
     # LLMs *seem* to respond better to getting asked for a word count but should workshop this.
     output_words = int(settings.SUMMARY.MAX_TOKENS_LONG * 0.75)
@@ -248,7 +289,10 @@ async def create_long_summary(
         previous_summary_text = "There is no previous summary -- the messages are the beginning of the conversation."
 
     prompt = long_summary_prompt(
-        formatted_messages, output_words, previous_summary_text
+        formatted_messages,
+        output_words,
+        previous_summary_text,
+        custom_instructions=custom_instructions,
     )
 
     return await honcho_llm_call(
@@ -439,6 +483,13 @@ async def _create_and_save_summary(
         previous_summary_tokens = latest_summary["token_count"] if latest_summary else 0
         input_tokens = messages_tokens + previous_summary_tokens
 
+    # Extract custom_instructions from the summarizer's own configuration.
+    # This is separate from reasoning custom_instructions — workspace
+    # operators may want summaries in a different style than deriver output.
+    custom_instructions: str | None = None
+    if configuration.summary and configuration.summary.custom_instructions is not None:
+        custom_instructions = configuration.summary.custom_instructions
+
     (
         new_summary,
         is_fallback,
@@ -453,16 +504,21 @@ async def _create_and_save_summary(
         last_message_id=last_message_id,
         last_message_content_preview=last_message_content_preview,
         message_count=message_count,
+        custom_instructions=custom_instructions,
         workspace_name=workspace_name,
     )
 
     # Compute scaffold tokens up front (cheap + idempotent) so both the
     # save-summary path and the telemetry emit below can use it
     # without basedpyright tripping on a possibly-unbound name.
     if summary_type == SummaryType.SHORT:
-        prompt_tokens = estimate_short_summary_prompt_tokens()
+        prompt_tokens = estimate_short_summary_prompt_tokens(
+            custom_instructions
+        )
     else:
-        prompt_tokens = estimate_long_summary_prompt_tokens()
+        prompt_tokens = estimate_long_summary_prompt_tokens(
+            custom_instructions
+        )
 
     # Step 3: Save to database with new transaction
     if not is_fallback:
@@ -552,6 +608,7 @@ async def _create_summary(
     last_message_id: int,
     last_message_content_preview: str,
     message_count: int,
+    custom_instructions: str | None = None,
     *,
     workspace_name: str | None = None,
 ) -> tuple[Summary, bool, int, int]:
@@ -567,6 +624,8 @@ async def _create_summary(
         last_message_id: ID of the last message
         last_message_content_preview: Preview of last message content for fallback
         message_count: Number of messages for fallback
+        custom_instructions: Optional workspace-level custom instructions for prompt
+        workspace_name: Optional workspace name for telemetry
 
     Returns:
         A tuple of (Summary, is_fallback, llm_input_tokens, llm_output_tokens)
@@ -585,12 +644,14 @@ async def _create_summary(
                 formatted_messages,
                 input_tokens,
                 previous_summary_text,
+                custom_instructions=custom_instructions,
                 workspace_name=workspace_name,
             )
         else:
             response = await create_long_summary(
                 formatted_messages,
                 previous_summary_text,
+                custom_instructions=custom_instructions,
                 workspace_name=workspace_name,
             )