Skip to content

Commit d49e820

Browse files
authored
feat: add max_tokens and structured output to /reflect (#74)
* feat: add structured output to /reflect * feat: add structured output to /reflect * imrpove * add max_toksn * fix rust client * fix rust client * fix rust client * try fix * try fix * no stricts
1 parent c8c7603 commit d49e820

25 files changed

Lines changed: 695 additions & 82 deletions

File tree

hindsight-api/hindsight_api/api/http.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -385,17 +385,31 @@ class ReflectRequest(BaseModel):
385385
"query": "What do you think about artificial intelligence?",
386386
"budget": "low",
387387
"context": "This is for a research paper on AI ethics",
388+
"max_tokens": 4096,
388389
"include": {"facts": {}},
390+
"response_schema": {
391+
"type": "object",
392+
"properties": {
393+
"summary": {"type": "string"},
394+
"key_points": {"type": "array", "items": {"type": "string"}},
395+
},
396+
"required": ["summary", "key_points"],
397+
},
389398
}
390399
}
391400
)
392401

393402
query: str
394403
budget: Budget = Budget.LOW
395404
context: str | None = None
405+
max_tokens: int = Field(default=4096, description="Maximum tokens for the response")
396406
include: ReflectIncludeOptions = Field(
397407
default_factory=ReflectIncludeOptions, description="Options for including additional data (disabled by default)"
398408
)
409+
response_schema: dict | None = Field(
410+
default=None,
411+
description="Optional JSON Schema for structured output. When provided, the response will include a 'structured_output' field with the LLM response parsed according to this schema.",
412+
)
399413

400414

401415
class OpinionItem(BaseModel):
@@ -440,12 +454,20 @@ class ReflectResponse(BaseModel):
440454
{"id": "123", "text": "AI is used in healthcare", "type": "world"},
441455
{"id": "456", "text": "I discussed AI applications last week", "type": "experience"},
442456
],
457+
"structured_output": {
458+
"summary": "AI is transformative",
459+
"key_points": ["Used in healthcare", "Discussed recently"],
460+
},
443461
}
444462
}
445463
)
446464

447465
text: str
448466
based_on: list[ReflectFact] = [] # Facts used to generate the response
467+
structured_output: dict | None = Field(
468+
default=None,
469+
description="Structured output parsed according to the request's response_schema. Only present when response_schema was provided in the request.",
470+
)
449471

450472

451473
class BanksResponse(BaseModel):
@@ -1211,6 +1233,8 @@ async def api_reflect(
12111233
query=request.query,
12121234
budget=request.budget,
12131235
context=request.context,
1236+
max_tokens=request.max_tokens,
1237+
response_schema=request.response_schema,
12141238
request_context=request_context,
12151239
)
12161240

@@ -1233,6 +1257,7 @@ async def api_reflect(
12331257
return ReflectResponse(
12341258
text=core_result.text,
12351259
based_on=based_on_facts,
1260+
structured_output=core_result.structured_output,
12361261
)
12371262

12381263
except Exception as e:

hindsight-api/hindsight_api/engine/interface.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,8 @@ async def reflect_async(
110110
*,
111111
budget: "Budget | None" = None,
112112
context: str | None = None,
113+
max_tokens: int = 4096,
114+
response_schema: dict | None = None,
113115
request_context: "RequestContext",
114116
) -> "ReflectResult":
115117
"""
@@ -120,6 +122,8 @@ async def reflect_async(
120122
query: The question to reflect on.
121123
budget: Search budget for retrieving context.
122124
context: Additional context for the reflection.
125+
max_tokens: Maximum tokens for the response.
126+
response_schema: Optional JSON Schema for structured output.
123127
request_context: Request context for authentication.
124128
125129
Returns:

hindsight-api/hindsight_api/engine/llm_wrapper.py

Lines changed: 27 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,7 @@ async def call(
171171
initial_backoff: float = 1.0,
172172
max_backoff: float = 60.0,
173173
skip_validation: bool = False,
174+
strict_schema: bool = False,
174175
) -> Any:
175176
"""
176177
Make an LLM API call with retry logic.
@@ -185,6 +186,7 @@ async def call(
185186
initial_backoff: Initial backoff time in seconds.
186187
max_backoff: Maximum backoff time in seconds.
187188
skip_validation: Return raw JSON without Pydantic validation.
189+
strict_schema: Use strict JSON schema enforcement (OpenAI only). Guarantees all required fields.
188190
189191
Returns:
190192
Parsed response if response_format is provided, otherwise text content.
@@ -268,22 +270,34 @@ async def call(
268270
for attempt in range(max_retries + 1):
269271
try:
270272
if response_format is not None:
271-
# Add schema to system message for JSON mode
273+
schema = None
272274
if hasattr(response_format, "model_json_schema"):
273275
schema = response_format.model_json_schema()
274-
schema_msg = f"\n\nYou must respond with valid JSON matching this schema:\n{json.dumps(schema, indent=2)}"
275276

276-
if call_params["messages"] and call_params["messages"][0].get("role") == "system":
277-
call_params["messages"][0]["content"] += schema_msg
278-
elif call_params["messages"]:
279-
call_params["messages"][0]["content"] = (
280-
schema_msg + "\n\n" + call_params["messages"][0]["content"]
281-
)
282-
283-
# LM Studio and Ollama don't support json_object response format reliably
284-
# We rely on the schema in the system message instead
285-
if self.provider not in ("lmstudio", "ollama"):
286-
call_params["response_format"] = {"type": "json_object"}
277+
if strict_schema and schema is not None:
278+
# Use OpenAI's strict JSON schema enforcement
279+
# This guarantees all required fields are returned
280+
call_params["response_format"] = {
281+
"type": "json_schema",
282+
"json_schema": {
283+
"name": "response",
284+
"strict": True,
285+
"schema": schema,
286+
},
287+
}
288+
else:
289+
# Soft enforcement: add schema to prompt and use json_object mode
290+
if schema is not None:
291+
schema_msg = f"\n\nYou must respond with valid JSON matching this schema:\n{json.dumps(schema, indent=2)}"
292+
293+
if call_params["messages"] and call_params["messages"][0].get("role") == "system":
294+
call_params["messages"][0]["content"] += schema_msg
295+
elif call_params["messages"]:
296+
call_params["messages"][0]["content"] = (
297+
schema_msg + "\n\n" + call_params["messages"][0]["content"]
298+
)
299+
if self.provider not in ("lmstudio", "ollama"):
300+
call_params["response_format"] = {"type": "json_object"}
287301

288302
logger.debug(f"Sending request to {self.provider}/{self.model} (timeout={self.timeout})")
289303
response = await self._client.chat.completions.create(**call_params)

hindsight-api/hindsight_api/engine/memory_engine.py

Lines changed: 38 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3078,6 +3078,8 @@ async def reflect_async(
30783078
*,
30793079
budget: Budget | None = None,
30803080
context: str | None = None,
3081+
max_tokens: int = 4096,
3082+
response_schema: dict | None = None,
30813083
request_context: "RequestContext",
30823084
) -> ReflectResult:
30833085
"""
@@ -3089,19 +3091,22 @@ async def reflect_async(
30893091
3. Retrieves existing opinions (bank's formed perspectives)
30903092
4. Uses LLM to formulate an answer
30913093
5. Extracts and stores any new opinions formed during reflection
3092-
6. Returns plain text answer and the facts used
3094+
6. Optionally generates structured output based on response_schema
3095+
7. Returns plain text answer and the facts used
30933096
30943097
Args:
30953098
bank_id: bank identifier
30963099
query: Question to answer
30973100
budget: Budget level for memory exploration (low=100, mid=300, high=600 units)
30983101
context: Additional context string to include in LLM prompt (not used in recall)
3102+
response_schema: Optional JSON Schema for structured output
30993103
31003104
Returns:
31013105
ReflectResult containing:
31023106
- text: Plain text answer (no markdown)
31033107
- based_on: Dict with 'world', 'experience', and 'opinion' fact lists (MemoryFact objects)
31043108
- new_opinions: List of newly formed opinions
3109+
- structured_output: Optional dict if response_schema was provided
31053110
"""
31063111
# Use cached LLM config
31073112
if self._llm_config is None:
@@ -3179,17 +3184,42 @@ async def reflect_async(
31793184
log_buffer.append(f"[REFLECT {reflect_id}] Prompt: {len(prompt)} chars")
31803185

31813186
system_message = think_utils.get_system_message(disposition)
3187+
messages = [{"role": "system", "content": system_message}, {"role": "user", "content": prompt}]
3188+
3189+
# Prepare response_format if schema provided
3190+
response_format = None
3191+
if response_schema is not None:
3192+
# Wrapper class to provide Pydantic-like interface for raw JSON schemas
3193+
class JsonSchemaWrapper:
3194+
def __init__(self, schema: dict):
3195+
self._schema = schema
3196+
3197+
def model_json_schema(self):
3198+
return self._schema
3199+
3200+
response_format = JsonSchemaWrapper(response_schema)
31823201

31833202
llm_start = time.time()
3184-
answer_text = await self._llm_config.call(
3185-
messages=[{"role": "system", "content": system_message}, {"role": "user", "content": prompt}],
3186-
scope="memory_think",
3187-
temperature=0.9,
3188-
max_completion_tokens=1000,
3203+
result = await self._llm_config.call(
3204+
messages=messages,
3205+
scope="memory_reflect",
3206+
max_completion_tokens=max_tokens,
3207+
response_format=response_format,
3208+
skip_validation=True if response_format else False,
3209+
# Don't enforce strict_schema - not all providers support it and may retry forever
3210+
# Soft enforcement (schema in prompt + json_object mode) is sufficient
3211+
strict_schema=False,
31893212
)
31903213
llm_time = time.time() - llm_start
31913214

3192-
answer_text = answer_text.strip()
3215+
# Handle response based on whether structured output was requested
3216+
if response_schema is not None:
3217+
structured_output = result
3218+
answer_text = "" # Empty for backward compatibility
3219+
log_buffer.append(f"[REFLECT {reflect_id}] Structured output generated")
3220+
else:
3221+
structured_output = None
3222+
answer_text = result.strip()
31933223

31943224
# Submit form_opinion task for background processing
31953225
await self._task_backend.submit_task(
@@ -3207,6 +3237,7 @@ async def reflect_async(
32073237
text=answer_text,
32083238
based_on={"world": world_results, "experience": agent_results, "opinion": opinion_results},
32093239
new_opinions=[], # Opinions are being extracted asynchronously
3240+
structured_output=structured_output,
32103241
)
32113242

32123243
# Call post-operation hook if validator is configured

hindsight-api/hindsight_api/engine/response_models.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,8 @@ class ReflectResult(BaseModel):
123123
Result from a reflect operation.
124124
125125
Contains the formulated answer, the facts it was based on (organized by type),
126-
and any new opinions that were formed during the reflection process.
126+
any new opinions that were formed during the reflection process, and optionally
127+
structured output if a response schema was provided.
127128
"""
128129

129130
model_config = ConfigDict(
@@ -145,6 +146,7 @@ class ReflectResult(BaseModel):
145146
"opinion": [],
146147
},
147148
"new_opinions": ["Machine learning has great potential in healthcare"],
149+
"structured_output": {"summary": "ML in healthcare", "confidence": 0.9},
148150
}
149151
}
150152
)
@@ -154,6 +156,10 @@ class ReflectResult(BaseModel):
154156
description="Facts used to formulate the answer, organized by type (world, experience, opinion)"
155157
)
156158
new_opinions: list[str] = Field(default_factory=list, description="List of newly formed opinions during reflection")
159+
structured_output: dict[str, Any] | None = Field(
160+
default=None,
161+
description="Structured output parsed according to the provided response schema. Only present when response_schema was provided.",
162+
)
157163

158164

159165
class Opinion(BaseModel):

0 commit comments

Comments
 (0)