Skip to content
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions sdks/python/src/honcho/api_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ class SummaryConfiguration(BaseModel):
enabled: bool | None = None
messages_per_short_summary: int | None = None
messages_per_long_summary: int | None = None
custom_instructions: str | None = None


class DreamConfiguration(BaseModel):
Expand Down
15 changes: 15 additions & 0 deletions src/schemas/configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,15 @@ class SummaryConfiguration(BaseModel):
ge=20,
description="Number of messages per long summary. Must be positive, greater than or equal to 20, and greater than messages_per_short_summary.",
)
custom_instructions: str | None = Field(
default=None,
description="Optional custom instructions for session summaries. Rejected if they exceed the summarizer custom-instruction token cap.",

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is validating against the deriver's generic MAX_CUSTOM_INSTRUCTIONS_TOKENS cap, not a summarizer-specific one. i think that's fine, but make that clear in the description

)

@field_validator("custom_instructions")
@classmethod
def validate_custom_instructions(cls, value: str | None) -> str | None:
return _validate_custom_instructions_budget(value)

@model_validator(mode="after")
def validate_summary_thresholds(self) -> Self:
Expand Down Expand Up @@ -172,6 +181,12 @@ class ResolvedSummaryConfiguration(BaseModel):
enabled: bool
messages_per_short_summary: int
messages_per_long_summary: int
custom_instructions: str | None = None

@field_validator("custom_instructions")
@classmethod
def validate_custom_instructions(cls, value: str | None) -> str | None:
return _validate_custom_instructions_budget(value)


class ResolvedDreamConfiguration(BaseModel):
Expand Down
81 changes: 71 additions & 10 deletions src/utils/summarizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
from src.config import ConfiguredModelSettings, settings
from src.crud.session import session_cache_key
from src.dependencies import tracked_db
# TODO: move _custom_instructions_section to shared utility
from src.deriver.prompts import _custom_instructions_section
from src.exceptions import ResourceNotFoundException
from src.llm import HonchoLLMCallResponse, honcho_llm_call
from src.llm.types import LLMTelemetryContext
Expand Down Expand Up @@ -57,6 +59,7 @@ class Summary(TypedDict):


def to_schema_summary(s: Summary) -> schemas.Summary:
"""Convert a Summary TypedDict to a Pydantic Summary schema object."""
return schemas.Summary(
content=s["content"],
message_id=s["message_id"],
Expand All @@ -81,6 +84,7 @@ def to_schema_summary(s: Summary) -> schemas.Summary:


def _get_summary_model_config() -> ConfiguredModelSettings:
"""Return the configured model settings for summary generation."""
return settings.SUMMARY.MODEL_CONFIG


Expand All @@ -101,8 +105,10 @@ def short_summary_prompt(
formatted_messages: str,
output_words: int,
previous_summary_text: str,
custom_instructions: str | None = None,
) -> str:
"""Generate the short summary prompt."""
custom_instructions_section = _custom_instructions_section(custom_instructions)
return c(f"""
You are a system that summarizes parts of a conversation to create a concise and accurate summary. Focus on capturing:

Expand All @@ -117,6 +123,7 @@ def short_summary_prompt(

Return only the summary without any explanation or meta-commentary.

{custom_instructions_section}
<previous_summary>
{previous_summary_text}
</previous_summary>
Expand All @@ -133,8 +140,10 @@ def long_summary_prompt(
formatted_messages: str,
output_words: int,
previous_summary_text: str,
custom_instructions: str | None = None,
) -> str:
"""Generate the long summary prompt."""
custom_instructions_section = _custom_instructions_section(custom_instructions)
return c(f"""
You are a system that creates thorough, comprehensive summaries of conversations. Focus on capturing:

Expand All @@ -151,6 +160,7 @@ def long_summary_prompt(

Return only the summary without any explanation or meta-commentary.

{custom_instructions_section}
<previous_summary>
{previous_summary_text}
</previous_summary>
Expand All @@ -164,45 +174,60 @@ def long_summary_prompt(


@cache
def estimate_short_summary_prompt_tokens() -> int:
"""Estimate tokens for the short summary prompt (without messages/previous_summary)."""
def estimate_short_summary_prompt_tokens(custom_instructions: str | None = None) -> int:
Comment thread
coderabbitai[bot] marked this conversation as resolved.
Outdated
"""Estimate tokens for the short summary prompt, optionally including custom instructions."""
try:
return estimate_tokens(
short_summary_prompt(
formatted_messages="",
output_words=0,
previous_summary_text="",
custom_instructions=custom_instructions,
)
)
except Exception:
# Return a rough estimate if estimation fails
return 200


@cache
def estimate_long_summary_prompt_tokens() -> int:
"""Estimate tokens for the long summary prompt (without messages/previous_summary)."""
def estimate_long_summary_prompt_tokens(custom_instructions: str | None = None) -> int:
"""Estimate tokens for the long summary prompt, optionally including custom instructions."""
try:
return estimate_tokens(
long_summary_prompt(
formatted_messages="",
output_words=0,
previous_summary_text="",
custom_instructions=None,

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should be custom_instructions=custom_instructions

)
)
except Exception:
# Return a rough estimate if estimation fails
return 200



@conditional_observe(name="Create Short Summary")
async def create_short_summary(
formatted_messages: str,
input_tokens: int,
previous_summary: str | None = None,
custom_instructions: str | None = None,
*,
workspace_name: str | None = None,
) -> HonchoLLMCallResponse[str]:
"""
Generate a short summary via an LLM call.

Args:
formatted_messages: Pre-formatted conversation messages.
input_tokens: Token count of the input (messages + previous summary).
previous_summary: Previous summary text for continuity, if any.
custom_instructions: Optional custom instructions from configuration.
workspace_name: Workspace name for telemetry attribution.

Returns:
The LLM response containing the short summary text and token counts.
"""
# input_tokens indicates how many tokens the message list + previous summary take up
# we want to optimize short summaries to be smaller than the actual content being summarized
# so we ask the agent to produce a word count roughly equal to either the input, or the max
Expand All @@ -216,7 +241,10 @@ async def create_short_summary(
previous_summary_text = "There is no previous summary -- the messages are the beginning of the conversation."

prompt = short_summary_prompt(
formatted_messages, output_words, previous_summary_text
formatted_messages,
output_words,
previous_summary_text,
custom_instructions=custom_instructions,
)

return await honcho_llm_call(
Expand All @@ -235,9 +263,22 @@ async def create_short_summary(
async def create_long_summary(
formatted_messages: str,
previous_summary: str | None = None,
custom_instructions: str | None = None,
*,
workspace_name: str | None = None,
) -> HonchoLLMCallResponse[str]:
"""
Generate a comprehensive long summary via an LLM call.

Args:
formatted_messages: Pre-formatted conversation messages.
previous_summary: Previous summary text for continuity, if any.
custom_instructions: Optional custom instructions from configuration.
workspace_name: Workspace name for telemetry attribution.

Returns:
The LLM response containing the long summary text and token counts.
"""
# the word/token ratio is roughly 4:3 so we multiply by 0.75.
# LLMs *seem* to respond better to getting asked for a word count but should workshop this.
output_words = int(settings.SUMMARY.MAX_TOKENS_LONG * 0.75)
Expand All @@ -248,7 +289,10 @@ async def create_long_summary(
previous_summary_text = "There is no previous summary -- the messages are the beginning of the conversation."

prompt = long_summary_prompt(
formatted_messages, output_words, previous_summary_text
formatted_messages,
output_words,
previous_summary_text,
custom_instructions=custom_instructions,
)

return await honcho_llm_call(
Expand Down Expand Up @@ -439,6 +483,13 @@ async def _create_and_save_summary(
previous_summary_tokens = latest_summary["token_count"] if latest_summary else 0
input_tokens = messages_tokens + previous_summary_tokens

# Extract custom_instructions from the summarizer's own configuration.
# This is separate from reasoning custom_instructions — workspace
# operators may want summaries in a different style than deriver output.
custom_instructions: str | None = None
if configuration.summary and configuration.summary.custom_instructions is not None:
custom_instructions = configuration.summary.custom_instructions

(
new_summary,
is_fallback,
Expand All @@ -453,16 +504,21 @@ async def _create_and_save_summary(
last_message_id=last_message_id,
last_message_content_preview=last_message_content_preview,
message_count=message_count,
custom_instructions=custom_instructions,
workspace_name=workspace_name,
)

# Compute scaffold tokens up front (cheap + idempotent) so both the
# save-summary path and the telemetry emit below can use it
# without basedpyright tripping on a possibly-unbound name.
if summary_type == SummaryType.SHORT:
prompt_tokens = estimate_short_summary_prompt_tokens()
prompt_tokens = estimate_short_summary_prompt_tokens(
custom_instructions
)
else:
prompt_tokens = estimate_long_summary_prompt_tokens()
prompt_tokens = estimate_long_summary_prompt_tokens(
custom_instructions
)

# Step 3: Save to database with new transaction
if not is_fallback:
Expand Down Expand Up @@ -552,6 +608,7 @@ async def _create_summary(
last_message_id: int,
last_message_content_preview: str,
message_count: int,
custom_instructions: str | None = None,
*,
workspace_name: str | None = None,
) -> tuple[Summary, bool, int, int]:
Expand All @@ -567,6 +624,8 @@ async def _create_summary(
last_message_id: ID of the last message
last_message_content_preview: Preview of last message content for fallback
message_count: Number of messages for fallback
custom_instructions: Optional workspace-level custom instructions for prompt
workspace_name: Optional workspace name for telemetry

Returns:
A tuple of (Summary, is_fallback, llm_input_tokens, llm_output_tokens)
Expand All @@ -585,12 +644,14 @@ async def _create_summary(
formatted_messages,
input_tokens,
previous_summary_text,
custom_instructions=custom_instructions,
workspace_name=workspace_name,
)
else:
response = await create_long_summary(
formatted_messages,
previous_summary_text,
custom_instructions=custom_instructions,
workspace_name=workspace_name,
)

Expand Down