1
- from typing import Literal
2
-
3
1
from fastapi import APIRouter , Depends , HTTPException
2
+ from mcp .server .fastmcp .prompts import base
3
+ from mcp .types import TextContent
4
4
5
5
from agent_memory_server import long_term_memory , messages
6
6
from agent_memory_server .config import settings
9
9
from agent_memory_server .logging import get_logger
10
10
from agent_memory_server .models import (
11
11
AckResponse ,
12
- CreateLongTermMemoryPayload ,
12
+ CreateLongTermMemoryRequest ,
13
13
GetSessionsQuery ,
14
14
LongTermMemoryResultsResponse ,
15
- SearchPayload ,
15
+ MemoryPromptRequest ,
16
+ MemoryPromptResponse ,
17
+ ModelNameLiteral ,
18
+ SearchRequest ,
16
19
SessionListResponse ,
17
20
SessionMemory ,
18
21
SessionMemoryResponse ,
22
+ SystemMessage ,
19
23
)
20
24
from agent_memory_server .utils .redis import get_redis_conn
21
25
22
26
23
27
logger = get_logger (__name__ )
24
28
25
- ModelNameLiteral = Literal [
26
- "gpt-3.5-turbo" ,
27
- "gpt-3.5-turbo-16k" ,
28
- "gpt-4" ,
29
- "gpt-4-32k" ,
30
- "gpt-4o" ,
31
- "gpt-4o-mini" ,
32
- "o1" ,
33
- "o1-mini" ,
34
- "o3-mini" ,
35
- "text-embedding-ada-002" ,
36
- "text-embedding-3-small" ,
37
- "text-embedding-3-large" ,
38
- "claude-3-opus-20240229" ,
39
- "claude-3-sonnet-20240229" ,
40
- "claude-3-haiku-20240307" ,
41
- "claude-3-5-sonnet-20240620" ,
42
- "claude-3-7-sonnet-20250219" ,
43
- "claude-3-5-sonnet-20241022" ,
44
- "claude-3-5-haiku-20241022" ,
45
- "claude-3-7-sonnet-latest" ,
46
- "claude-3-5-sonnet-latest" ,
47
- "claude-3-5-haiku-latest" ,
48
- "claude-3-opus-latest" ,
49
- ]
50
-
51
29
router = APIRouter ()
52
30
53
31
32
+ def _get_effective_window_size (
33
+ window_size : int ,
34
+ context_window_max : int | None ,
35
+ model_name : ModelNameLiteral | None ,
36
+ ) -> int :
37
+ # If context_window_max is explicitly provided, use that
38
+ if context_window_max is not None :
39
+ effective_window_size = min (window_size , context_window_max )
40
+ # If model_name is provided, get its max_tokens from our config
41
+ elif model_name is not None :
42
+ model_config = get_model_config (model_name )
43
+ effective_window_size = min (window_size , model_config .max_tokens )
44
+ # Otherwise use the default window_size
45
+ else :
46
+ effective_window_size = window_size
47
+ return effective_window_size
48
+
49
+
54
50
@router .get ("/sessions/" , response_model = SessionListResponse )
55
51
async def list_sessions (
56
52
options : GetSessionsQuery = Depends (),
@@ -103,17 +99,11 @@ async def get_session_memory(
103
99
Conversation history and context
104
100
"""
105
101
redis = await get_redis_conn ()
106
-
107
- # If context_window_max is explicitly provided, use that
108
- if context_window_max is not None :
109
- effective_window_size = min (window_size , context_window_max )
110
- # If model_name is provided, get its max_tokens from our config
111
- elif model_name is not None :
112
- model_config = get_model_config (model_name )
113
- effective_window_size = min (window_size , model_config .max_tokens )
114
- # Otherwise use the default window_size
115
- else :
116
- effective_window_size = window_size
102
+ effective_window_size = _get_effective_window_size (
103
+ window_size = window_size ,
104
+ context_window_max = context_window_max ,
105
+ model_name = model_name ,
106
+ )
117
107
118
108
session = await messages .get_session_memory (
119
109
redis = redis ,
@@ -181,7 +171,7 @@ async def delete_session_memory(
181
171
182
172
@router .post ("/long-term-memory" , response_model = AckResponse )
183
173
async def create_long_term_memory (
184
- payload : CreateLongTermMemoryPayload ,
174
+ payload : CreateLongTermMemoryRequest ,
185
175
background_tasks = Depends (get_background_tasks ),
186
176
):
187
177
"""
@@ -205,7 +195,7 @@ async def create_long_term_memory(
205
195
206
196
207
197
@router .post ("/long-term-memory/search" , response_model = LongTermMemoryResultsResponse )
208
- async def search_long_term_memory (payload : SearchPayload ):
198
+ async def search_long_term_memory (payload : SearchRequest ):
209
199
"""
210
200
Run a semantic search on long-term memory with filtering options.
211
201
@@ -215,11 +205,11 @@ async def search_long_term_memory(payload: SearchPayload):
215
205
Returns:
216
206
List of search results
217
207
"""
218
- redis = await get_redis_conn ()
219
-
220
208
if not settings .long_term_memory :
221
209
raise HTTPException (status_code = 400 , detail = "Long-term memory is disabled" )
222
210
211
+ redis = await get_redis_conn ()
212
+
223
213
# Extract filter objects from the payload
224
214
filters = payload .get_filters ()
225
215
@@ -236,3 +226,97 @@ async def search_long_term_memory(payload: SearchPayload):
236
226
237
227
# Pass text, redis, and filter objects to the search function
238
228
return await long_term_memory .search_long_term_memories (** kwargs )
229
+
230
+
231
+ @router .post ("/memory-prompt" , response_model = MemoryPromptResponse )
232
+ async def memory_prompt (params : MemoryPromptRequest ) -> MemoryPromptResponse :
233
+ """
234
+ Hydrate a user query with memory context and return a prompt
235
+ ready to send to an LLM.
236
+
237
+ `query` is the input text that the caller of this API wants to use to find
238
+ relevant context. If `session_id` is provided and matches an existing
239
+ session, the resulting prompt will include those messages as the immediate
240
+ history of messages leading to a message containing `query`.
241
+
242
+ If `long_term_search_payload` is provided, the resulting prompt will include
243
+ relevant long-term memories found via semantic search with the options
244
+ provided in the payload.
245
+
246
+ Args:
247
+ params: MemoryPromptRequest
248
+
249
+ Returns:
250
+ List of messages to send to an LLM, hydrated with relevant memory context
251
+ """
252
+ if not params .session and not params .long_term_search :
253
+ raise HTTPException (
254
+ status_code = 400 ,
255
+ detail = "Either session or long_term_search must be provided" ,
256
+ )
257
+
258
+ redis = await get_redis_conn ()
259
+ _messages = []
260
+
261
+ if params .session :
262
+ effective_window_size = _get_effective_window_size (
263
+ window_size = params .session .window_size ,
264
+ context_window_max = params .session .context_window_max ,
265
+ model_name = params .session .model_name ,
266
+ )
267
+ session_memory = await messages .get_session_memory (
268
+ redis = redis ,
269
+ session_id = params .session .session_id ,
270
+ window_size = effective_window_size ,
271
+ namespace = params .session .namespace ,
272
+ )
273
+
274
+ if session_memory :
275
+ if session_memory .context :
276
+ # TODO: Weird to use MCP types here?
277
+ _messages .append (
278
+ SystemMessage (
279
+ content = TextContent (
280
+ type = "text" ,
281
+ text = f"## A summary of the conversation so far\n { session_memory .context } " ,
282
+ ),
283
+ )
284
+ )
285
+ # Ignore past system messages as the latest context may have changed
286
+ for msg in session_memory .messages :
287
+ if msg .role == "user" :
288
+ msg_class = base .UserMessage
289
+ else :
290
+ msg_class = base .AssistantMessage
291
+ _messages .append (
292
+ msg_class (
293
+ content = TextContent (type = "text" , text = msg .content ),
294
+ )
295
+ )
296
+
297
+ if params .long_term_search :
298
+ # TODO: Exclude session messages if we already included them from session memory
299
+ long_term_memories = await search_long_term_memory (
300
+ params .long_term_search ,
301
+ )
302
+
303
+ if long_term_memories .total > 0 :
304
+ long_term_memories_text = "\n " .join (
305
+ [f"- { m .text } " for m in long_term_memories .memories ]
306
+ )
307
+ _messages .append (
308
+ SystemMessage (
309
+ content = TextContent (
310
+ type = "text" ,
311
+ text = f"## Long term memories related to the user's query\n { long_term_memories_text } " ,
312
+ ),
313
+ )
314
+ )
315
+
316
+ _messages .append (
317
+ base .UserMessage (
318
+ content = TextContent (type = "text" , text = params .query ),
319
+ )
320
+ )
321
+
322
+ return MemoryPromptResponse (messages = _messages )
0 commit comments