Skip to content

Commit ea0249b

Browse files
committed
Prompt changes to provide better search results, increase max tokens to allow the model to provide longer responses.
1 parent 7957bf5 commit ea0249b

File tree

2 files changed

+180
-54
lines changed

2 files changed

+180
-54
lines changed

api/memoryalpha/rag.py

Lines changed: 179 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ def get_system_prompt(thinking_mode: ThinkingMode) -> str:
4242
- If the records don't contain relevant information, say "I don't have information about that in my records"
4343
- DO NOT make up information, invent characters, or hallucinate details
4444
- DO NOT use external knowledge about Star Trek - only use the provided records
45+
- AVOID mirror universe references unless specifically asked about it
4546
- If asked about something not in the records, be honest about the limitation
4647
- Stay in character as an LCARS computer system at all times
4748
@@ -240,73 +241,198 @@ def ask(self, query: str, max_tokens: int = 2048, top_k: int = 10, top_p: float
240241

241242
logger.info(f"Starting tool-enabled RAG for query: {query}")
242243

243-
# Always do an initial search
244-
logger.info("Performing initial search for query")
245-
docs = self.search(query, top_k=top_k)
246-
logger.info(f"Initial search returned {len(docs)} documents")
247-
248-
if not docs:
249-
logger.warning("No documents found in initial search")
250-
return "I don't have information about that in the Memory Alpha database."
251-
252-
# Format search results for the LLM
253-
context_parts = []
254-
for i, doc in enumerate(docs, 1):
255-
content = doc['content']
256-
if len(content) > 1000: # Limit content for LLM
257-
content = content[:1000] + "..."
258-
context_parts.append(f"DOCUMENT {i}: {doc['title']}\n{content}")
259-
260-
context_text = "\n\n".join(context_parts)
261-
244+
# Define the search tool
245+
search_tool_definition = {
246+
"type": "function",
247+
"function": {
248+
"name": "search_memory_alpha",
249+
"description": "Search the Star Trek Memory Alpha database for information. Use this tool when you need to find specific information about Star Trek characters, episodes, ships, planets, or other topics.",
250+
"parameters": {
251+
"type": "object",
252+
"properties": {
253+
"query": {
254+
"type": "string",
255+
"description": "The search query to find relevant information"
256+
},
257+
"top_k": {
258+
"type": "integer",
259+
"description": "Number of documents to retrieve (default: 5, max: 10)",
260+
"default": 5,
261+
"maximum": 10
262+
}
263+
},
264+
"required": ["query"]
265+
}
266+
}
267+
}
268+
262269
system_prompt = """You are an LCARS computer system with access to Star Trek Memory Alpha records.
263270
264-
CRITICAL INSTRUCTIONS:
265-
- You MUST answer ONLY using the provided search results below
271+
You have access to a search tool that can query the Memory Alpha database. You MUST use this tool for ALL questions about Star Trek.
272+
273+
CRITICAL REQUIREMENTS:
274+
- You MUST call the search tool for EVERY question
275+
- You cannot answer any question without first using the search tool
266276
- Do NOT use any external knowledge or make up information
267-
- If the search results don't contain the information, say so clearly
268-
- Stay in character as an LCARS computer system
269-
- Be concise but informative"""
277+
- Only answer based on the search results provided
278+
- If no relevant information is found, say so clearly
279+
- ALWAYS provide a final answer after using tools - do not just think without concluding
280+
281+
TOOL USAGE:
282+
- Always call the search tool first, before attempting to answer
283+
- Do NOT directly use the input question, only use keywords from it
284+
- Use only key terms from the input question for seaching
285+
- If insufficient information is found on the first try, retry with variations or relevant info from previous queries
286+
- DISCARD details from alternate universes or timelines
287+
- DISREGARD details about books, comics, or non-canon sources
288+
- NEVER mention appearances or actors, only in-universe details
289+
- Ensure a complete answer can be formulated before stopping searches
290+
- Wait for search results before providing your final answer
291+
292+
RESPONSE FORMAT:
293+
- Use tools when needed
294+
- Provide your final answer clearly and concisely
295+
- Do not add details that are irrelevant to the question
296+
- Stay in-character as an LCARS computer system at all times, do not allude to the Star Trek universe itself or it being a fictional setting
297+
- Do not mention the search results, only the final in-universe answer
298+
- Do not end responses with thinking content"""
270299

271300
messages = [
272301
{"role": "system", "content": system_prompt},
273-
{"role": "user", "content": f"SEARCH RESULTS:\n{context_text}\n\nQUESTION: {query}\n\nAnswer using ONLY the information in the search results above."}
302+
{"role": "user", "content": f"Please answer this question about Star Trek: {query}"}
274303
]
275304

276-
try:
277-
result = self.ollama_client.chat(
278-
model=model,
279-
messages=messages,
280-
stream=False,
281-
options={"temperature": temperature, "top_p": top_p, "num_predict": max_tokens}
282-
)
283-
284-
final_response = result['message']['content']
285-
logger.info(f"LLM response length: {len(final_response)}")
305+
max_iterations = 5 # Prevent infinite loops
306+
iteration = 0
307+
has_used_tool = False
308+
309+
while iteration < max_iterations:
310+
iteration += 1
311+
logger.info(f"Iteration {iteration} for query: {query}")
286312

287-
# Handle thinking mode response processing
288-
if self.thinking_mode == ThinkingMode.DISABLED:
313+
try:
314+
logger.info(f"Sending messages to LLM: {[msg['role'] for msg in messages]}")
315+
result = self.ollama_client.chat(
316+
model=model,
317+
messages=messages,
318+
stream=False,
319+
options={"temperature": temperature, "top_p": top_p, "num_predict": max_tokens},
320+
tools=[search_tool_definition]
321+
)
322+
323+
response_message = result['message']
324+
logger.info(f"LLM response type: {type(response_message)}")
325+
logger.debug(f"Response message attributes: {dir(response_message)}")
326+
logger.debug(f"Response message content: {response_message.get('content', 'No content')[:200]}...")
327+
328+
# Check if the model wants to use a tool
329+
tool_calls = getattr(response_message, 'tool_calls', None) or response_message.get('tool_calls')
330+
if tool_calls:
331+
has_used_tool = True
332+
logger.info(f"Tool calls detected: {len(tool_calls)}")
333+
# Execute the tool call
334+
tool_call = tool_calls[0]
335+
logger.info(f"Tool call: {tool_call.get('function', {}).get('name', 'Unknown')}")
336+
337+
if tool_call.get('function', {}).get('name') == 'search_memory_alpha':
338+
args = tool_call.get('function', {}).get('arguments', {})
339+
search_query = args.get('query', '')
340+
search_top_k = min(args.get('top_k', 5), 10)
341+
342+
logger.info(f"Executing search for: '{search_query}' with top_k={search_top_k}")
343+
344+
# Execute the search
345+
search_result = self.search_tool(search_query, search_top_k)
346+
logger.info(f"Search result length: {len(search_result)}")
347+
logger.debug(f"Search result preview: {search_result[:500]}...")
348+
349+
# Add the tool call and result to messages
350+
messages.append(response_message)
351+
messages.append({
352+
"role": "tool",
353+
"content": search_result,
354+
"tool_call_id": tool_call.get('id', '')
355+
})
356+
357+
logger.info("Continuing conversation with tool results")
358+
continue # Continue the conversation with tool results
359+
360+
# If no tool call and we haven't used tools yet, force a search
361+
if not has_used_tool and iteration == 1:
362+
logger.info("LLM didn't use tool on first attempt, forcing initial search")
363+
search_result = self.search_tool(query, 5)
364+
messages.append({
365+
"role": "tool",
366+
"content": search_result,
367+
"tool_call_id": "forced_search"
368+
})
369+
has_used_tool = True
370+
continue
371+
372+
# If no tool call, this is the final answer
373+
final_response = response_message.get('content', '')
374+
if not final_response:
375+
logger.warning("LLM returned empty content")
376+
final_response = "I apologize, but I was unable to generate a response."
377+
378+
logger.info(f"Final response length: {len(final_response)}")
379+
logger.info(f"Final response preview: {final_response[:200]}...")
380+
logger.debug(f"Raw final response: {repr(final_response[:500])}")
381+
382+
# Always clean the response first to remove thinking tags and unwanted content
289383
final_response = self._clean_response(final_response)
290-
elif self.thinking_mode == ThinkingMode.QUIET:
291-
final_response = self._replace_thinking_tags(final_response)
292-
else: # VERBOSE
293-
final_response = final_response.strip()
384+
logger.debug(f"After cleaning: {repr(final_response[:500])}")
385+
386+
# If cleaning removed everything, the LLM was just thinking without answering
387+
if not final_response.strip():
388+
logger.warning("LLM response was only thinking content, no final answer provided")
389+
final_response = "I apologize, but I was unable to find sufficient information to answer your question based on the available Memory Alpha records."
390+
391+
logger.info(f"Thinking mode: {self.thinking_mode}")
392+
logger.info(f"Final cleaned response: {final_response[:200]}...")
393+
394+
# Handle thinking mode response processing
395+
if self.thinking_mode == ThinkingMode.QUIET:
396+
final_response = self._replace_thinking_tags(final_response)
397+
# For DISABLED and VERBOSE modes, the response is already clean
398+
399+
self._update_history(query, final_response)
400+
logger.info("Returning final answer")
401+
return final_response
402+
403+
except Exception as e:
404+
logger.error(f"Chat failed: {e}")
405+
return f"Error processing query: {str(e)}"
294406

295-
self._update_history(query, final_response)
296-
return final_response
297-
298-
except Exception as e:
299-
logger.error(f"Chat failed: {e}")
300-
return f"Error processing query: {str(e)}"
407+
# Fallback if max iterations reached
408+
logger.warning(f"Max iterations reached for query: {query}")
409+
return "Query processing exceeded maximum iterations. Please try a simpler question."
301410

302411
def _clean_response(self, answer: str) -> str:
303412
"""Clean response by removing ANSI codes and thinking tags."""
304-
clean = re.sub(r"\033\[[0-9;]*m", "", answer).replace("LCARS: ", "").strip()
305-
while "<think>" in clean and "</think>" in clean:
306-
start = clean.find("<think>")
307-
end = clean.find("</think>") + len("</think>")
308-
clean = clean[:start] + clean[end:]
309-
return clean.strip()
413+
if not answer:
414+
return ""
415+
416+
# Remove ANSI codes
417+
clean = re.sub(r"\033\[[0-9;]*m", "", answer)
418+
# Remove LCARS prefix
419+
clean = clean.replace("LCARS: ", "").strip()
420+
421+
# Remove thinking tags and their content - multiple patterns
422+
# Pattern 1: Complete <think>...</think> blocks
423+
clean = re.sub(r'<think>.*?</think>', '', clean, flags=re.DOTALL | re.IGNORECASE)
424+
# Pattern 2: Unclosed <think> tags
425+
clean = re.sub(r'<think>.*?(?=<think>|</think>|$)', '', clean, flags=re.DOTALL | re.IGNORECASE)
426+
# Pattern 3: Any remaining think tags
427+
clean = re.sub(r'</?think>', '', clean, flags=re.IGNORECASE)
428+
# Pattern 4: Alternative thinking formats
429+
clean = re.sub(r'<thinking>.*?</thinking>', '', clean, flags=re.DOTALL | re.IGNORECASE)
430+
431+
# Remove extra whitespace and newlines
432+
clean = re.sub(r'\n\s*\n', '\n', clean)
433+
clean = clean.strip()
434+
435+
return clean
310436

311437
def _replace_thinking_tags(self, answer: str) -> str:
312438
"""Replace thinking tags with processing text."""

chat.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
# Interactive chat script for MemoryAlpha RAG API
44
BASE_URL="http://localhost:8000"
55
THINKING_MODE="DISABLED"
6-
MAX_TOKENS=512
6+
MAX_TOKENS=2048
77
TOP_K=5
88
TOP_P=0.8
99
TEMPERATURE=0.3

0 commit comments

Comments
 (0)