Azure-Samples
diff --git a/‎.vscode/settings.json
Lines changed: 2 additions & 1 deletion b/‎.vscode/settings.json
Lines changed: 2 additions & 1 deletion
diff --git a/‎requirements-dev.txt
Lines changed: 1 addition & 0 deletions b/‎requirements-dev.txt
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/backend/fastapi_app/__init__.py
Lines changed: 1 addition & 1 deletion b/‎src/backend/fastapi_app/__init__.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/backend/fastapi_app/api_models.py
Lines changed: 33 additions & 3 deletions b/‎src/backend/fastapi_app/api_models.py
Lines changed: 33 additions & 3 deletions
diff --git a/‎src/backend/fastapi_app/openai_clients.py
Lines changed: 8 additions & 4 deletions b/‎src/backend/fastapi_app/openai_clients.py
Lines changed: 8 additions & 4 deletions
diff --git a/‎src/backend/fastapi_app/postgres_searcher.py
Lines changed: 10 additions & 6 deletions b/‎src/backend/fastapi_app/postgres_searcher.py
Lines changed: 10 additions & 6 deletions
diff --git a/‎src/backend/fastapi_app/rag_advanced.py
Lines changed: 17 additions & 42 deletions b/‎src/backend/fastapi_app/rag_advanced.py
Lines changed: 17 additions & 42 deletions
diff --git a/‎src/backend/fastapi_app/rag_base.py
Lines changed: 18 additions & 11 deletions b/‎src/backend/fastapi_app/rag_base.py
Lines changed: 18 additions & 11 deletions
diff --git a/‎src/backend/fastapi_app/rag_simple.py
Lines changed: 1 addition & 1 deletion b/‎src/backend/fastapi_app/rag_simple.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/backend/pyproject.toml
Lines changed: 1 addition & 1 deletion b/‎src/backend/pyproject.toml
Lines changed: 1 addition & 1 deletion
@@ -36,5 +36,6 @@
         "htmlcov": true,
         ".mypy_cache": true,
         ".coverage": true
-    }
+    },
+    "python.REPL.enableREPLSmartSend": false
 }
@@ -14,3 +14,4 @@ pytest-snapshot
 locust
 psycopg2
 dotenv-azd
+freezegun
@@ -38,7 +38,7 @@ async def lifespan(app: fastapi.FastAPI) -> AsyncIterator[State]:
     if (
         os.getenv("OPENAI_CHAT_HOST") == "azure"
         or os.getenv("OPENAI_EMBED_HOST") == "azure"
-        or os.getenv("POSTGRES_HOST").endswith(".database.azure.com")
+        or os.getenv("POSTGRES_HOST", "").endswith(".database.azure.com")
     ):
         azure_credential = await get_azure_credential()
     engine = await create_postgres_engine_from_env(azure_credential)
 
@@ -1,8 +1,9 @@
 from enum import Enum
-from typing import Any, Optional
+from typing import Any, Optional, Union
 
 from openai.types.chat import ChatCompletionMessageParam
-from pydantic import BaseModel
+from pydantic import BaseModel, Field
+from pydantic_ai.messages import ModelRequest, ModelResponse
 
 
 class AIChatRoles(str, Enum):
@@ -95,4 +96,33 @@ class ChatParams(ChatRequestOverrides):
     enable_text_search: bool
     enable_vector_search: bool
     original_user_query: str
-    past_messages: list[ChatCompletionMessageParam]
+    past_messages: list[Union[ModelRequest, ModelResponse]]
+
+
+class Filter(BaseModel):
+    column: str
+    comparison_operator: str
+    value: Any
+
+
+class PriceFilter(Filter):
+    column: str = Field(default="price", description="The column to filter on (always 'price' for this filter)")
+    comparison_operator: str = Field(description="The operator for price comparison ('>', '<', '>=', '<=', '=')")
+    value: float = Field(description="The price value to compare against (e.g., 30.00)")
+
+
+class BrandFilter(Filter):
+    column: str = Field(default="brand", description="The column to filter on (always 'brand' for this filter)")
+    comparison_operator: str = Field(description="The operator for brand comparison ('=' or '!=')")
+    value: str = Field(description="The brand name to compare against (e.g., 'AirStrider')")
+
+
+class SearchResults(BaseModel):
+    query: str
+    """The original search query"""
+
+    items: list[ItemPublic]
+    """List of items that match the search query and filters"""
+
+    filters: list[Filter]
+    """List of filters applied to the search results"""
@@ -9,7 +9,7 @@
 
 
 async def create_openai_chat_client(
-    azure_credential: Union[azure.identity.AzureDeveloperCliCredential, azure.identity.ManagedIdentityCredential],
+    azure_credential: Union[azure.identity.AzureDeveloperCliCredential, azure.identity.ManagedIdentityCredential, None],
 ) -> Union[openai.AsyncAzureOpenAI, openai.AsyncOpenAI]:
     openai_chat_client: Union[openai.AsyncAzureOpenAI, openai.AsyncOpenAI]
     OPENAI_CHAT_HOST = os.getenv("OPENAI_CHAT_HOST")
@@ -29,7 +29,7 @@ async def create_openai_chat_client(
                 azure_deployment=azure_deployment,
                 api_key=api_key,
             )
-        else:
+        elif azure_credential:
             logger.info(
                 "Setting up Azure OpenAI client for chat completions using Azure Identity, endpoint %s, deployment %s",
                 azure_endpoint,
@@ -44,6 +44,8 @@ async def create_openai_chat_client(
                 azure_deployment=azure_deployment,
                 azure_ad_token_provider=token_provider,
             )
+        else:
+            raise ValueError("Azure OpenAI client requires either an API key or Azure Identity credential.")
     elif OPENAI_CHAT_HOST == "ollama":
         logger.info("Setting up OpenAI client for chat completions using Ollama")
         openai_chat_client = openai.AsyncOpenAI(
@@ -67,7 +69,7 @@ async def create_openai_chat_client(
 
 
 async def create_openai_embed_client(
-    azure_credential: Union[azure.identity.AzureDeveloperCliCredential, azure.identity.ManagedIdentityCredential],
+    azure_credential: Union[azure.identity.AzureDeveloperCliCredential, azure.identity.ManagedIdentityCredential, None],
 ) -> Union[openai.AsyncAzureOpenAI, openai.AsyncOpenAI]:
     openai_embed_client: Union[openai.AsyncAzureOpenAI, openai.AsyncOpenAI]
     OPENAI_EMBED_HOST = os.getenv("OPENAI_EMBED_HOST")
@@ -87,7 +89,7 @@ async def create_openai_embed_client(
                 azure_deployment=azure_deployment,
                 api_key=api_key,
             )
-        else:
+        elif azure_credential:
             logger.info(
                 "Setting up Azure OpenAI client for embeddings using Azure Identity, endpoint %s, deployment %s",
                 azure_endpoint,
@@ -102,6 +104,8 @@ async def create_openai_embed_client(
                 azure_deployment=azure_deployment,
                 azure_ad_token_provider=token_provider,
             )
+        else:
+            raise ValueError("Azure OpenAI client requires either an API key or Azure Identity credential.")
     elif OPENAI_EMBED_HOST == "ollama":
         logger.info("Setting up OpenAI client for embeddings using Ollama")
         openai_embed_client = openai.AsyncOpenAI(
 
@@ -5,6 +5,7 @@
 from sqlalchemy import Float, Integer, column, select, text
 from sqlalchemy.ext.asyncio import AsyncSession
 
+from fastapi_app.api_models import Filter
 from fastapi_app.embeddings import compute_text_embedding
 from fastapi_app.postgres_models import Item
 
@@ -26,21 +27,24 @@ def __init__(
         self.embed_dimensions = embed_dimensions
         self.embedding_column = embedding_column
 
-    def build_filter_clause(self, filters) -> tuple[str, str]:
+    def build_filter_clause(self, filters: Optional[list[Filter]]) -> tuple[str, str]:
         if filters is None:
             return "", ""
         filter_clauses = []
         for filter in filters:
-            if isinstance(filter["value"], str):
-                filter["value"] = f"'{filter['value']}'"
-            filter_clauses.append(f"{filter['column']} {filter['comparison_operator']} {filter['value']}")
+            filter_value = f"'{filter.value}'" if isinstance(filter.value, str) else filter.value
+            filter_clauses.append(f"{filter.column} {filter.comparison_operator} {filter_value}")
         filter_clause = " AND ".join(filter_clauses)
         if len(filter_clause) > 0:
             return f"WHERE {filter_clause}", f"AND {filter_clause}"
         return "", ""
 
     async def search(
-        self, query_text: Optional[str], query_vector: list[float], top: int = 5, filters: Optional[list[dict]] = None
+        self,
+        query_text: Optional[str],
+        query_vector: list[float],
+        top: int = 5,
+        filters: Optional[list[Filter]] = None,
     ):
         filter_clause_where, filter_clause_and = self.build_filter_clause(filters)
         table_name = Item.__tablename__
@@ -106,7 +110,7 @@ async def search_and_embed(
         top: int = 5,
         enable_vector_search: bool = False,
         enable_text_search: bool = False,
-        filters: Optional[list[dict]] = None,
+        filters: Optional[list[Filter]] = None,
     ) -> list[Item]:
         """
         Search rows by query text. Optionally converts the query text to a vector if enable_vector_search is True.
 
@@ -1,5 +1,5 @@
 from collections.abc import AsyncGenerator
-from typing import Optional, TypedDict, Union
+from typing import Optional, Union
 
 from openai import AsyncAzureOpenAI, AsyncOpenAI
 from openai.types.chat import ChatCompletionMessageParam
@@ -11,51 +11,22 @@
 
 from fastapi_app.api_models import (
     AIChatRoles,
+    BrandFilter,
     ChatRequestOverrides,
+    Filter,
     ItemPublic,
     Message,
+    PriceFilter,
     RAGContext,
     RetrievalResponse,
     RetrievalResponseDelta,
+    SearchResults,
     ThoughtStep,
 )
 from fastapi_app.postgres_searcher import PostgresSearcher
 from fastapi_app.rag_base import ChatParams, RAGChatBase
 
 
-class PriceFilter(TypedDict):
-    column: str = "price"
-    """The column to filter on (always 'price' for this filter)"""
-
-    comparison_operator: str
-    """The operator for price comparison ('>', '<', '>=', '<=', '=')"""
-
-    value: float
-    """ The price value to compare against (e.g., 30.00) """
-
-
-class BrandFilter(TypedDict):
-    column: str = "brand"
-    """The column to filter on (always 'brand' for this filter)"""
-
-    comparison_operator: str
-    """The operator for brand comparison ('=' or '!=')"""
-
-    value: str
-    """The brand name to compare against (e.g., 'AirStrider')"""
-
-
-class SearchResults(TypedDict):
-    query: str
-    """The original search query"""
-
-    items: list[ItemPublic]
-    """List of items that match the search query and filters"""
-
-    filters: list[Union[PriceFilter, BrandFilter]]
-    """List of filters applied to the search results"""
-
-
 class AdvancedRAGChat(RAGChatBase):
     query_prompt_template = open(RAGChatBase.prompts_dir / "query.txt").read()
     query_fewshots = open(RAGChatBase.prompts_dir / "query_fewshots.json").read()
@@ -79,9 +50,13 @@ def __init__(
             chat_model if chat_deployment is None else chat_deployment,
             provider=OpenAIProvider(openai_client=openai_chat_client),
         )
-        self.search_agent = Agent(
+        self.search_agent = Agent[ChatParams, SearchResults](
             pydantic_chat_model,
-            model_settings=ModelSettings(temperature=0.0, max_tokens=500, seed=self.chat_params.seed),
+            model_settings=ModelSettings(
+                temperature=0.0,
+                max_tokens=500,
+                **({"seed": self.chat_params.seed} if self.chat_params.seed is not None else {}),
+            ),
             system_prompt=self.query_prompt_template,
             tools=[self.search_database],
             output_type=SearchResults,
@@ -92,7 +67,7 @@ def __init__(
             model_settings=ModelSettings(
                 temperature=self.chat_params.temperature,
                 max_tokens=self.chat_params.response_token_limit,
-                seed=self.chat_params.seed,
+                **({"seed": self.chat_params.seed} if self.chat_params.seed is not None else {}),
             ),
         )
 
@@ -115,7 +90,7 @@ async def search_database(
             List of formatted items that match the search query and filters
         """
         # Only send non-None filters
-        filters = []
+        filters: list[Filter] = []
         if price_filter:
             filters.append(price_filter)
         if brand_filter:
@@ -134,12 +109,12 @@ async def search_database(
     async def prepare_context(self) -> tuple[list[ItemPublic], list[ThoughtStep]]:
         few_shots = ModelMessagesTypeAdapter.validate_json(self.query_fewshots)
         user_query = f"Find search results for user query: {self.chat_params.original_user_query}"
-        results = await self.search_agent.run(
+        results = await self.search_agent.run(  # type: ignore[call-overload]
             user_query,
             message_history=few_shots + self.chat_params.past_messages,
             deps=self.chat_params,
         )
-        items = results.output["items"]
+        items = results.output.items
         thoughts = [
             ThoughtStep(
                 title="Prompt to generate search arguments",
@@ -148,12 +123,12 @@ async def prepare_context(self) -> tuple[list[ItemPublic], list[ThoughtStep]]:
             ),
             ThoughtStep(
                 title="Search using generated search arguments",
-                description=results.output["query"],
+                description=results.output.query,
                 props={
                     "top": self.chat_params.top,
                     "vector_search": self.chat_params.enable_vector_search,
                     "text_search": self.chat_params.enable_text_search,
-                    "filters": results.output["filters"],
+                    "filters": results.output.filters,
                 },
             ),
             ThoughtStep(
 
@@ -1,8 +1,10 @@
 import pathlib
 from abc import ABC, abstractmethod
 from collections.abc import AsyncGenerator
+from typing import Union
 
 from openai.types.chat import ChatCompletionMessageParam
+from pydantic_ai.messages import ModelRequest, ModelResponse, TextPart, UserPromptPart
 
 from fastapi_app.api_models import (
     ChatParams,
@@ -12,7 +14,6 @@
     RetrievalResponseDelta,
     ThoughtStep,
 )
-from fastapi_app.postgres_models import Item
 
 
 class RAGChatBase(ABC):
@@ -31,7 +32,19 @@ def get_chat_params(
         original_user_query = messages[-1]["content"]
         if not isinstance(original_user_query, str):
             raise ValueError("The most recent message content must be a string.")
-        past_messages = messages[:-1]
+
+        # Convert to PydanticAI format:
+        past_messages: list[Union[ModelRequest, ModelResponse]] = []
+        for message in messages[:-1]:
+            content = message["content"]
+            if not isinstance(content, str):
+                raise ValueError("All messages must have string content.")
+            if message["role"] == "user":
+                past_messages.append(ModelRequest(parts=[UserPromptPart(content=content)]))
+            elif message["role"] == "assistant":
+                past_messages.append(ModelResponse(parts=[TextPart(content=content)]))
+            else:
+                raise ValueError(f"Cannot convert message: {message}")
 
         return ChatParams(
             top=overrides.top,
@@ -48,9 +61,7 @@ def get_chat_params(
         )
 
     @abstractmethod
-    async def prepare_context(
-        self, chat_params: ChatParams
-    ) -> tuple[list[ChatCompletionMessageParam], list[Item], list[ThoughtStep]]:
+    async def prepare_context(self) -> tuple[list[ItemPublic], list[ThoughtStep]]:
         raise NotImplementedError
 
     def prepare_rag_request(self, user_query, items: list[ItemPublic]) -> str:
@@ -60,19 +71,15 @@ def prepare_rag_request(self, user_query, items: list[ItemPublic]) -> str:
     @abstractmethod
     async def answer(
         self,
-        chat_params: ChatParams,
-        contextual_messages: list[ChatCompletionMessageParam],
-        results: list[Item],
+        items: list[ItemPublic],
         earlier_thoughts: list[ThoughtStep],
     ) -> RetrievalResponse:
         raise NotImplementedError
 
     @abstractmethod
     async def answer_stream(
         self,
-        chat_params: ChatParams,
-        contextual_messages: list[ChatCompletionMessageParam],
-        results: list[Item],
+        items: list[ItemPublic],
         earlier_thoughts: list[ThoughtStep],
     ) -> AsyncGenerator[RetrievalResponseDelta, None]:
         raise NotImplementedError
 
@@ -48,7 +48,7 @@ def __init__(
             model_settings=ModelSettings(
                 temperature=self.chat_params.temperature,
                 max_tokens=self.chat_params.response_token_limit,
-                seed=self.chat_params.seed,
+                **({"seed": self.chat_params.seed} if self.chat_params.seed is not None else {}),
             ),
         )
 
 
@@ -19,7 +19,7 @@ dependencies = [
     "opentelemetry-instrumentation-sqlalchemy",
     "opentelemetry-instrumentation-aiohttp-client",
     "opentelemetry-instrumentation-openai",
-    "pydantic-ai"
+    "pydantic-ai-slim[openai]"
 ]
 
 [build-system]
Original file line number	Diff line number	Diff line change
`@@ -36,5 +36,6 @@`
`36`	`36`	`"htmlcov": true,`
`37`	`37`	`".mypy_cache": true,`
`38`	`38`	`".coverage": true`
`39`		`- }`
	`39`	`+ },`
	`40`	`+ "python.REPL.enableREPLSmartSend": false`
`40`	`41`	`}`
Original file line number	Diff line number	Diff line change
`@@ -48,7 +48,7 @@ def __init__(`
`48`	`48`	`model_settings=ModelSettings(`
`49`	`49`	`temperature=self.chat_params.temperature,`
`50`	`50`	`max_tokens=self.chat_params.response_token_limit,`
`51`		`- seed=self.chat_params.seed,`
	`51`	`+ **({"seed": self.chat_params.seed} if self.chat_params.seed is not None else {}),`
`52`	`52`	`),`
`53`	`53`	`)`
`54`	`54`
Original file line number	Diff line number	Diff line change
`@@ -19,7 +19,7 @@ dependencies = [`
`19`	`19`	`"opentelemetry-instrumentation-sqlalchemy",`
`20`	`20`	`"opentelemetry-instrumentation-aiohttp-client",`
`21`	`21`	`"opentelemetry-instrumentation-openai",`
`22`		`- "pydantic-ai"`
	`22`	`+ "pydantic-ai-slim[openai]"`
`23`	`23`	`]`
`24`	`24`
`25`	`25`	`[build-system]`