refactor code

john0isaac · john0isaac · commit 13797a4561f7 · 2024-07-23T01:37:44.000Z
diff --git a/src/backend/fastapi_app/rag_advanced.py b/src/backend/fastapi_app/rag_advanced.py
@@ -6,9 +6,10 @@
 from openai_messages_token_helper import build_messages, get_token_limit
 
 from fastapi_app.api_models import Message, RAGContext, RetrievalResponse, ThoughtStep
+from fastapi_app.postgres_models import Item
 from fastapi_app.postgres_searcher import PostgresSearcher
 from fastapi_app.query_rewriter import build_search_function, extract_search_arguments
-from fastapi_app.rag_simple import RAGChatBase
+from fastapi_app.rag_simple import ChatParams, RAGChatBase
 
 
 class AdvancedRAGChat(RAGChatBase):
@@ -26,15 +27,10 @@ def __init__(
         self.chat_deployment = chat_deployment
         self.chat_token_limit = get_token_limit(chat_model, default_to_minimum=True)
 
-    async def run(
-        self,
-        messages: list[ChatCompletionMessageParam],
-        overrides: dict[str, Any] = {},
-    ) -> RetrievalResponse:
-        chat_params = self.get_params(messages, overrides)
-
-        # Generate an optimized keyword search query based on the chat history and the last question
-        query_response_token_limit = 500
+    async def generate_search_query(
+        self, chat_params: ChatParams, query_response_token_limit: int
+    ) -> tuple[list[ChatCompletionMessageParam], Any | str | None, list]:
+        """Generate an optimized keyword search query based on the chat history and the last question"""
         query_messages: list[ChatCompletionMessageParam] = build_messages(
             model=self.chat_model,
             system_prompt=self.query_prompt_template,
@@ -57,6 +53,12 @@ async def run(
 
         query_text, filters = extract_search_arguments(chat_params.original_user_query, chat_completion)
 
+        return query_messages, query_text, filters
+
+    async def retreive_and_build_context(
+        self, chat_params: ChatParams, query_text: str | Any | None, filters: list
+    ) -> tuple[list[ChatCompletionMessageParam], list[Item]]:
+        """Retrieve relevant items from the database and build a context for the chat model."""
         # Retrieve relevant items from the database with the GPT optimized query
         results = await self.searcher.search_and_embed(
             query_text,
@@ -70,22 +72,40 @@ async def run(
         content = "\n".join(sources_content)
 
         # Generate a contextual and content specific answer using the search results and chat history
-        response_token_limit = 1024
         contextual_messages: list[ChatCompletionMessageParam] = build_messages(
             model=self.chat_model,
-            system_prompt=overrides.get("prompt_template") or self.answer_prompt_template,
+            system_prompt=chat_params.prompt_template,
             new_user_content=chat_params.original_user_query + "\n\nSources:\n" + content,
             past_messages=chat_params.past_messages,
-            max_tokens=self.chat_token_limit - response_token_limit,
+            max_tokens=self.chat_token_limit - chat_params.response_token_limit,
             fallback_to_default=True,
         )
+        return contextual_messages, results
+
+    async def run(
+        self,
+        messages: list[ChatCompletionMessageParam],
+        overrides: dict[str, Any] = {},
+    ) -> RetrievalResponse:
+        chat_params = self.get_params(messages, overrides)
+
+        # Generate an optimized keyword search query based on the chat history and the last question
+        query_messages, query_text, filters = await self.generate_search_query(
+            chat_params=chat_params, query_response_token_limit=500
+        )
+
+        # Retrieve relevant items from the database with the GPT optimized query
+        # Generate a contextual and content specific answer using the search results and chat history
+        contextual_messages, results = await self.retreive_and_build_context(
+            chat_params=chat_params, query_text=query_text, filters=filters
+        )
 
         chat_completion_response: ChatCompletion = await self.openai_chat_client.chat.completions.create(
             # Azure OpenAI takes the deployment name as the model name
             model=self.chat_deployment if self.chat_deployment else self.chat_model,
             messages=contextual_messages,
-            temperature=overrides.get("temperature", 0.3),
-            max_tokens=response_token_limit,
+            temperature=chat_params.temperature,
+            max_tokens=chat_params.response_token_limit,
             n=1,
             stream=False,
         )
@@ -141,50 +161,14 @@ async def run_stream(
         chat_params = self.get_params(messages, overrides)
 
         # Generate an optimized keyword search query based on the chat history and the last question
-        query_response_token_limit = 500
-        query_messages: list[ChatCompletionMessageParam] = build_messages(
-            model=self.chat_model,
-            system_prompt=self.query_prompt_template,
-            new_user_content=chat_params.original_user_query,
-            past_messages=chat_params.past_messages,
-            max_tokens=self.chat_token_limit - query_response_token_limit,  # TODO: count functions
-            fallback_to_default=True,
+        query_messages, query_text, filters = await self.generate_search_query(
+            chat_params=chat_params, query_response_token_limit=500
         )
 
-        chat_completion: ChatCompletion = await self.openai_chat_client.chat.completions.create(
-            messages=query_messages,
-            # Azure OpenAI takes the deployment name as the model name
-            model=self.chat_deployment if self.chat_deployment else self.chat_model,
-            temperature=0.0,  # Minimize creativity for search query generation
-            max_tokens=query_response_token_limit,  # Setting too low risks malformed JSON, too high risks performance
-            n=1,
-            tools=build_search_function(),
-            tool_choice="auto",
-        )
-
-        query_text, filters = extract_search_arguments(chat_params.original_user_query, chat_completion)
-
         # Retrieve relevant items from the database with the GPT optimized query
-        results = await self.searcher.search_and_embed(
-            query_text,
-            top=chat_params.top,
-            enable_vector_search=chat_params.enable_vector_search,
-            enable_text_search=chat_params.enable_text_search,
-            filters=filters,
-        )
-
-        sources_content = [f"[{(item.id)}]:{item.to_str_for_rag()}\n\n" for item in results]
-        content = "\n".join(sources_content)
-
         # Generate a contextual and content specific answer using the search results and chat history
-        response_token_limit = 1024
-        contextual_messages: list[ChatCompletionMessageParam] = build_messages(
-            model=self.chat_model,
-            system_prompt=overrides.get("prompt_template") or self.answer_prompt_template,
-            new_user_content=chat_params.original_user_query + "\n\nSources:\n" + content,
-            past_messages=chat_params.past_messages,
-            max_tokens=self.chat_token_limit - response_token_limit,
-            fallback_to_default=True,
+        contextual_messages, results = await self.retreive_and_build_context(
+            chat_params=chat_params, query_text=query_text, filters=filters
         )
 
         chat_completion_async_stream: AsyncStream[
@@ -193,8 +177,8 @@ async def run_stream(
             # Azure OpenAI takes the deployment name as the model name
             model=self.chat_deployment if self.chat_deployment else self.chat_model,
             messages=contextual_messages,
-            temperature=overrides.get("temperature", 0.3),
-            max_tokens=response_token_limit,
+            temperature=chat_params.temperature,
+            max_tokens=chat_params.response_token_limit,
             n=1,
             stream=True,
         )
diff --git a/src/backend/fastapi_app/rag_simple.py b/src/backend/fastapi_app/rag_simple.py
@@ -9,16 +9,19 @@
 from pydantic import BaseModel
 
 from fastapi_app.api_models import Message, RAGContext, RetrievalResponse, ThoughtStep
+from fastapi_app.postgres_models import Item
 from fastapi_app.postgres_searcher import PostgresSearcher
 
 
 class ChatParams(BaseModel):
-    top: int
-    temperature: float
+    top: int = 3
+    temperature: float = 0.3
+    response_token_limit: int = 1024
     enable_text_search: bool
     enable_vector_search: bool
     original_user_query: str
     past_messages: list[ChatCompletionMessageParam]
+    prompt_template: str
 
 
 class RAGChatBase(ABC):
@@ -27,17 +30,24 @@ class RAGChatBase(ABC):
     answer_prompt_template = open(current_dir / "prompts/answer.txt").read()
 
     def get_params(self, messages: list[ChatCompletionMessageParam], overrides: dict[str, Any]) -> ChatParams:
-        enable_text_search = overrides.get("retrieval_mode") in ["text", "hybrid", None]
-        enable_vector_search = overrides.get("retrieval_mode") in ["vectors", "hybrid", None]
         top: int = overrides.get("top", 3)
         temperature: float = overrides.get("temperature", 0.3)
+        response_token_limit = 1024
+        prompt_template = overrides.get("prompt_template") or self.answer_prompt_template
+
+        enable_text_search = overrides.get("retrieval_mode") in ["text", "hybrid", None]
+        enable_vector_search = overrides.get("retrieval_mode") in ["vectors", "hybrid", None]
+
         original_user_query = messages[-1]["content"]
         if not isinstance(original_user_query, str):
             raise ValueError("The most recent message content must be a string.")
         past_messages = messages[:-1]
+
         return ChatParams(
             top=top,
             temperature=temperature,
+            response_token_limit=response_token_limit,
+            prompt_template=prompt_template,
             enable_text_search=enable_text_search,
             enable_vector_search=enable_vector_search,
             original_user_query=original_user_query,
@@ -52,6 +62,15 @@ async def run(
     ) -> RetrievalResponse:
         raise NotImplementedError
 
+    @abstractmethod
+    async def retreive_and_build_context(
+        self,
+        chat_params: ChatParams,
+        *args,
+        **kwargs,
+    ) -> tuple[list[ChatCompletionMessageParam], list[Item]]:
+        raise NotImplementedError
+
     @abstractmethod
     async def run_stream(
         self,
@@ -78,12 +97,10 @@ def __init__(
         self.chat_deployment = chat_deployment
         self.chat_token_limit = get_token_limit(chat_model, default_to_minimum=True)
 
-    async def run(
-        self,
-        messages: list[ChatCompletionMessageParam],
-        overrides: dict[str, Any] = {},
-    ) -> RetrievalResponse:
-        chat_params = self.get_params(messages, overrides)
+    async def retreive_and_build_context(
+        self, chat_params: ChatParams
+    ) -> tuple[list[ChatCompletionMessageParam], list[Item]]:
+        """Retrieve relevant items from the database and build a context for the chat model."""
 
         # Retrieve relevant items from the database
         results = await self.searcher.search_and_embed(
@@ -97,22 +114,33 @@ async def run(
         content = "\n".join(sources_content)
 
         # Generate a contextual and content specific answer using the search results and chat history
-        response_token_limit = 1024
         contextual_messages: list[ChatCompletionMessageParam] = build_messages(
             model=self.chat_model,
-            system_prompt=overrides.get("prompt_template") or self.answer_prompt_template,
+            system_prompt=chat_params.prompt_template,
             new_user_content=chat_params.original_user_query + "\n\nSources:\n" + content,
             past_messages=chat_params.past_messages,
-            max_tokens=self.chat_token_limit - response_token_limit,
+            max_tokens=self.chat_token_limit - chat_params.response_token_limit,
             fallback_to_default=True,
         )
+        return contextual_messages, results
+
+    async def run(
+        self,
+        messages: list[ChatCompletionMessageParam],
+        overrides: dict[str, Any] = {},
+    ) -> RetrievalResponse:
+        chat_params = self.get_params(messages, overrides)
+
+        # Retrieve relevant items from the database
+        # Generate a contextual and content specific answer using the search results and chat history
+        contextual_messages, results = await self.retreive_and_build_context(chat_params=chat_params)
 
         chat_completion_response: ChatCompletion = await self.openai_chat_client.chat.completions.create(
             # Azure OpenAI takes the deployment name as the model name
             model=self.chat_deployment if self.chat_deployment else self.chat_model,
             messages=contextual_messages,
             temperature=chat_params.temperature,
-            max_tokens=response_token_limit,
+            max_tokens=chat_params.response_token_limit,
             n=1,
             stream=False,
         )
@@ -158,35 +186,17 @@ async def run_stream(
         chat_params = self.get_params(messages, overrides)
 
         # Retrieve relevant items from the database
-        results = await self.searcher.search_and_embed(
-            chat_params.original_user_query,
-            top=chat_params.top,
-            enable_vector_search=chat_params.enable_vector_search,
-            enable_text_search=chat_params.enable_text_search,
-        )
-
-        sources_content = [f"[{(item.id)}]:{item.to_str_for_rag()}\n\n" for item in results]
-        content = "\n".join(sources_content)
-
         # Generate a contextual and content specific answer using the search results and chat history
-        response_token_limit = 1024
-        contextual_messages: list[ChatCompletionMessageParam] = build_messages(
-            model=self.chat_model,
-            system_prompt=overrides.get("prompt_template") or self.answer_prompt_template,
-            new_user_content=chat_params.original_user_query + "\n\nSources:\n" + content,
-            past_messages=chat_params.past_messages,
-            max_tokens=self.chat_token_limit - response_token_limit,
-            fallback_to_default=True,
-        )
+        contextual_messages, results = await self.retreive_and_build_context(chat_params=chat_params)
 
         chat_completion_async_stream: AsyncStream[
             ChatCompletionChunk
         ] = await self.openai_chat_client.chat.completions.create(
             # Azure OpenAI takes the deployment name as the model name
             model=self.chat_deployment if self.chat_deployment else self.chat_model,
             messages=contextual_messages,
-            temperature=overrides.get("temperature", 0.3),
-            max_tokens=response_token_limit,
+            temperature=chat_params.temperature,
+            max_tokens=chat_params.response_token_limit,
             n=1,
             stream=True,
         )