diff --git a/.azdo/pipelines/azure-dev.yml b/.azdo/pipelines/azure-dev.yml index 24a3a70848..00d801010d 100644 --- a/.azdo/pipelines/azure-dev.yml +++ b/.azdo/pipelines/azure-dev.yml @@ -44,7 +44,6 @@ steps: AZURE_LOCATION: $(AZURE_LOCATION) AZD_INITIAL_ENVIRONMENT_CONFIG: $(AZD_INITIAL_ENVIRONMENT_CONFIG) AZURE_OPENAI_SERVICE: $(AZURE_OPENAI_SERVICE) - AZURE_OPENAI_API_VERSION: $(AZURE_OPENAI_API_VERSION) AZURE_OPENAI_LOCATION: $(AZURE_OPENAI_LOCATION) AZURE_OPENAI_RESOURCE_GROUP: $(AZURE_OPENAI_RESOURCE_GROUP) AZURE_DOCUMENTINTELLIGENCE_SERVICE: $(AZURE_DOCUMENTINTELLIGENCE_SERVICE) diff --git a/.github/workflows/azure-dev.yml b/.github/workflows/azure-dev.yml index f4723d6888..b9e5d3efa4 100644 --- a/.github/workflows/azure-dev.yml +++ b/.github/workflows/azure-dev.yml @@ -31,7 +31,6 @@ jobs: # project specific AZURE_OPENAI_SERVICE: ${{ vars.AZURE_OPENAI_SERVICE }} AZURE_OPENAI_LOCATION: ${{ vars.AZURE_OPENAI_LOCATION }} - AZURE_OPENAI_API_VERSION: ${{ vars.AZURE_OPENAI_API_VERSION }} AZURE_OPENAI_RESOURCE_GROUP: ${{ vars.AZURE_OPENAI_RESOURCE_GROUP }} AZURE_DOCUMENTINTELLIGENCE_SERVICE: ${{ vars.AZURE_DOCUMENTINTELLIGENCE_SERVICE }} AZURE_DOCUMENTINTELLIGENCE_RESOURCE_GROUP: ${{ vars.AZURE_DOCUMENTINTELLIGENCE_RESOURCE_GROUP }} diff --git a/.github/workflows/evaluate.yaml b/.github/workflows/evaluate.yaml index 4a45b81712..28fc4a3cef 100644 --- a/.github/workflows/evaluate.yaml +++ b/.github/workflows/evaluate.yaml @@ -29,7 +29,6 @@ jobs: # project specific AZURE_OPENAI_SERVICE: ${{ vars.AZURE_OPENAI_SERVICE }} AZURE_OPENAI_LOCATION: ${{ vars.AZURE_OPENAI_LOCATION }} - AZURE_OPENAI_API_VERSION: ${{ vars.AZURE_OPENAI_API_VERSION }} AZURE_OPENAI_RESOURCE_GROUP: ${{ vars.AZURE_OPENAI_RESOURCE_GROUP }} AZURE_DOCUMENTINTELLIGENCE_SERVICE: ${{ vars.AZURE_DOCUMENTINTELLIGENCE_SERVICE }} AZURE_DOCUMENTINTELLIGENCE_RESOURCE_GROUP: ${{ vars.AZURE_DOCUMENTINTELLIGENCE_RESOURCE_GROUP }} diff --git a/app/backend/app.py b/app/backend/app.py index f7b36794b4..d0771d73f1 100644 --- a/app/backend/app.py +++ b/app/backend/app.py @@ -90,7 +90,6 @@ from error import error_dict, error_response from prepdocs import ( OpenAIHost, - clean_key_if_exists, setup_embeddings_service, setup_file_processors, setup_image_embeddings_service, @@ -426,8 +425,6 @@ async def setup_clients(): os.getenv("AZURE_OPENAI_EMB_DEPLOYMENT") if OPENAI_HOST in [OpenAIHost.AZURE, OpenAIHost.AZURE_CUSTOM] else None ) AZURE_OPENAI_CUSTOM_URL = os.getenv("AZURE_OPENAI_CUSTOM_URL") - # https://learn.microsoft.com/azure/ai-services/openai/api-version-deprecation#latest-ga-api-release - AZURE_OPENAI_API_VERSION = os.getenv("AZURE_OPENAI_API_VERSION") or "2024-10-21" AZURE_VISION_ENDPOINT = os.getenv("AZURE_VISION_ENDPOINT", "") AZURE_OPENAI_API_KEY_OVERRIDE = os.getenv("AZURE_OPENAI_API_KEY_OVERRIDE") # Used only with non-Azure OpenAI deployments @@ -558,10 +555,9 @@ async def setup_clients(): # Wait until token is needed to fetch for the first time current_app.config[CONFIG_SPEECH_SERVICE_TOKEN] = None - openai_client = setup_openai_client( + openai_client, azure_openai_endpoint = setup_openai_client( openai_host=OPENAI_HOST, azure_credential=azure_credential, - azure_openai_api_version=AZURE_OPENAI_API_VERSION, azure_openai_service=AZURE_OPENAI_SERVICE, azure_openai_custom_url=AZURE_OPENAI_CUSTOM_URL, azure_openai_api_key=AZURE_OPENAI_API_KEY_OVERRIDE, @@ -602,17 +598,12 @@ async def setup_clients(): search_service=AZURE_SEARCH_SERVICE, index_name=AZURE_SEARCH_INDEX, azure_credential=azure_credential ) text_embeddings_service = setup_embeddings_service( - azure_credential=azure_credential, - openai_host=OpenAIHost(OPENAI_HOST), + open_ai_client=openai_client, + openai_host=OPENAI_HOST, emb_model_name=OPENAI_EMB_MODEL, emb_model_dimensions=OPENAI_EMB_DIMENSIONS, - azure_openai_service=AZURE_OPENAI_SERVICE, - azure_openai_custom_url=AZURE_OPENAI_CUSTOM_URL, azure_openai_deployment=AZURE_OPENAI_EMB_DEPLOYMENT, - azure_openai_api_version=AZURE_OPENAI_API_VERSION, - azure_openai_key=clean_key_if_exists(AZURE_OPENAI_API_KEY_OVERRIDE), - openai_key=clean_key_if_exists(OPENAI_API_KEY), - openai_org=OPENAI_ORGANIZATION, + azure_openai_endpoint=azure_openai_endpoint, disable_vectors=os.getenv("USE_VECTORS", "").lower() == "false", ) image_embeddings_service = setup_image_embeddings_service( @@ -727,6 +718,7 @@ async def close_clients(): await current_app.config[CONFIG_GLOBAL_BLOB_MANAGER].close_clients() if user_blob_manager := current_app.config.get(CONFIG_USER_BLOB_MANAGER): await user_blob_manager.close_clients() + await current_app.config[CONFIG_CREDENTIAL].close() def create_app(): diff --git a/app/backend/prepdocs.py b/app/backend/prepdocs.py index 5debb5b537..254b3d64eb 100644 --- a/app/backend/prepdocs.py +++ b/app/backend/prepdocs.py @@ -2,6 +2,7 @@ import asyncio import logging import os +from collections.abc import Awaitable, Callable from enum import Enum from typing import Optional @@ -9,17 +10,13 @@ from azure.core.credentials import AzureKeyCredential from azure.core.credentials_async import AsyncTokenCredential from azure.identity.aio import AzureDeveloperCliCredential, get_bearer_token_provider -from openai import AsyncAzureOpenAI, AsyncOpenAI +from openai import AsyncOpenAI from rich.logging import RichHandler from load_azd_env import load_azd_env from prepdocslib.blobmanager import BlobManager from prepdocslib.csvparser import CsvParser -from prepdocslib.embeddings import ( - AzureOpenAIEmbeddingService, - ImageEmbeddings, - OpenAIEmbeddingService, -) +from prepdocslib.embeddings import ImageEmbeddings, OpenAIEmbeddings from prepdocslib.fileprocessor import FileProcessor from prepdocslib.filestrategy import FileStrategy from prepdocslib.htmlparser import LocalHTMLParser @@ -160,17 +157,12 @@ class OpenAIHost(str, Enum): def setup_embeddings_service( - azure_credential: AsyncTokenCredential, + open_ai_client: AsyncOpenAI, openai_host: OpenAIHost, emb_model_name: str, emb_model_dimensions: int, - azure_openai_service: Optional[str], - azure_openai_custom_url: Optional[str], - azure_openai_deployment: Optional[str], - azure_openai_key: Optional[str], - azure_openai_api_version: str, - openai_key: Optional[str], - openai_org: Optional[str], + azure_openai_deployment: str | None, + azure_openai_endpoint: str | None, disable_vectors: bool = False, disable_batch_vectors: bool = False, ): @@ -179,70 +171,59 @@ def setup_embeddings_service( return None if openai_host in [OpenAIHost.AZURE, OpenAIHost.AZURE_CUSTOM]: - azure_open_ai_credential: AsyncTokenCredential | AzureKeyCredential = ( - azure_credential if azure_openai_key is None else AzureKeyCredential(azure_openai_key) - ) - return AzureOpenAIEmbeddingService( - open_ai_service=azure_openai_service, - open_ai_custom_url=azure_openai_custom_url, - open_ai_deployment=azure_openai_deployment, - open_ai_model_name=emb_model_name, - open_ai_dimensions=emb_model_dimensions, - open_ai_api_version=azure_openai_api_version, - credential=azure_open_ai_credential, - disable_batch=disable_batch_vectors, - ) - else: - if openai_key is None: - raise ValueError("OpenAI key is required when using the non-Azure OpenAI API") - return OpenAIEmbeddingService( - open_ai_model_name=emb_model_name, - open_ai_dimensions=emb_model_dimensions, - credential=openai_key, - organization=openai_org, - disable_batch=disable_batch_vectors, - ) + if azure_openai_endpoint is None: + raise ValueError("Azure OpenAI endpoint must be provided when using Azure OpenAI embeddings") + if azure_openai_deployment is None: + raise ValueError("Azure OpenAI deployment must be provided when using Azure OpenAI embeddings") + + return OpenAIEmbeddings( + open_ai_client=open_ai_client, + open_ai_model_name=emb_model_name, + open_ai_dimensions=emb_model_dimensions, + disable_batch=disable_batch_vectors, + azure_deployment_name=azure_openai_deployment, + azure_endpoint=azure_openai_endpoint, + ) def setup_openai_client( openai_host: OpenAIHost, azure_credential: AsyncTokenCredential, azure_openai_api_key: Optional[str] = None, - azure_openai_api_version: Optional[str] = None, azure_openai_service: Optional[str] = None, azure_openai_custom_url: Optional[str] = None, openai_api_key: Optional[str] = None, openai_organization: Optional[str] = None, -): - if openai_host not in OpenAIHost: - raise ValueError(f"Invalid OPENAI_HOST value: {openai_host}. Must be one of {[h.value for h in OpenAIHost]}.") - +) -> tuple[AsyncOpenAI, Optional[str]]: openai_client: AsyncOpenAI + azure_openai_endpoint: Optional[str] = None if openai_host in [OpenAIHost.AZURE, OpenAIHost.AZURE_CUSTOM]: + base_url: Optional[str] = None + api_key_or_token: Optional[str | Callable[[], Awaitable[str]]] = None if openai_host == OpenAIHost.AZURE_CUSTOM: logger.info("OPENAI_HOST is azure_custom, setting up Azure OpenAI custom client") if not azure_openai_custom_url: raise ValueError("AZURE_OPENAI_CUSTOM_URL must be set when OPENAI_HOST is azure_custom") - endpoint = azure_openai_custom_url + base_url = azure_openai_custom_url else: logger.info("OPENAI_HOST is azure, setting up Azure OpenAI client") if not azure_openai_service: raise ValueError("AZURE_OPENAI_SERVICE must be set when OPENAI_HOST is azure") - endpoint = f"https://{azure_openai_service}.openai.azure.com" + azure_openai_endpoint = f"https://{azure_openai_service}.openai.azure.com" + base_url = f"{azure_openai_endpoint}/openai/v1" if azure_openai_api_key: logger.info("AZURE_OPENAI_API_KEY_OVERRIDE found, using as api_key for Azure OpenAI client") - openai_client = AsyncAzureOpenAI( - api_version=azure_openai_api_version, azure_endpoint=endpoint, api_key=azure_openai_api_key - ) + api_key_or_token = azure_openai_api_key else: logger.info("Using Azure credential (passwordless authentication) for Azure OpenAI client") - token_provider = get_bearer_token_provider(azure_credential, "https://cognitiveservices.azure.com/.default") - openai_client = AsyncAzureOpenAI( - api_version=azure_openai_api_version, - azure_endpoint=endpoint, - azure_ad_token_provider=token_provider, + api_key_or_token = get_bearer_token_provider( + azure_credential, "https://cognitiveservices.azure.com/.default" ) + openai_client = AsyncOpenAI( + base_url=base_url, + api_key=api_key_or_token, # type: ignore[arg-type] + ) elif openai_host == OpenAIHost.LOCAL: logger.info("OPENAI_HOST is local, setting up local OpenAI client for OPENAI_BASE_URL with no key") openai_client = AsyncOpenAI( @@ -259,7 +240,7 @@ def setup_openai_client( api_key=openai_api_key, organization=openai_organization, ) - return openai_client + return openai_client, azure_openai_endpoint def setup_file_processors( @@ -368,7 +349,7 @@ async def main(strategy: Strategy, setup_index: bool = True): await strategy.run() -if __name__ == "__main__": +if __name__ == "__main__": # pragma: no cover parser = argparse.ArgumentParser( description="Prepare documents by extracting content from PDFs, splitting content into sections, uploading to blob storage, and indexing in a search index." ) @@ -516,41 +497,34 @@ async def main(strategy: Strategy, setup_index: bool = True): enable_global_documents=enable_global_documents, ) - # https://learn.microsoft.com/azure/ai-services/openai/api-version-deprecation#latest-ga-api-release - azure_openai_api_version = os.getenv("AZURE_OPENAI_API_VERSION") or "2024-06-01" emb_model_dimensions = 1536 if os.getenv("AZURE_OPENAI_EMB_DIMENSIONS"): emb_model_dimensions = int(os.environ["AZURE_OPENAI_EMB_DIMENSIONS"]) - openai_embeddings_service = setup_embeddings_service( - azure_credential=azd_credential, - openai_host=OPENAI_HOST, - emb_model_name=os.environ["AZURE_OPENAI_EMB_MODEL_NAME"], - emb_model_dimensions=emb_model_dimensions, - azure_openai_service=os.getenv("AZURE_OPENAI_SERVICE"), - azure_openai_custom_url=os.getenv("AZURE_OPENAI_CUSTOM_URL"), - azure_openai_deployment=os.getenv("AZURE_OPENAI_EMB_DEPLOYMENT"), - azure_openai_api_version=azure_openai_api_version, - azure_openai_key=os.getenv("AZURE_OPENAI_API_KEY_OVERRIDE"), - openai_key=clean_key_if_exists(os.getenv("OPENAI_API_KEY")), - openai_org=os.getenv("OPENAI_ORGANIZATION"), - disable_vectors=dont_use_vectors, - disable_batch_vectors=args.disablebatchvectors, - ) - openai_client = setup_openai_client( + + openai_client, azure_openai_endpoint = setup_openai_client( openai_host=OPENAI_HOST, azure_credential=azd_credential, - azure_openai_api_version=azure_openai_api_version, azure_openai_service=os.getenv("AZURE_OPENAI_SERVICE"), azure_openai_custom_url=os.getenv("AZURE_OPENAI_CUSTOM_URL"), azure_openai_api_key=os.getenv("AZURE_OPENAI_API_KEY_OVERRIDE"), openai_api_key=clean_key_if_exists(os.getenv("OPENAI_API_KEY")), openai_organization=os.getenv("OPENAI_ORGANIZATION"), ) + openai_embeddings_service = setup_embeddings_service( + open_ai_client=openai_client, + openai_host=OPENAI_HOST, + emb_model_name=os.environ["AZURE_OPENAI_EMB_MODEL_NAME"], + emb_model_dimensions=emb_model_dimensions, + azure_openai_deployment=os.getenv("AZURE_OPENAI_EMB_DEPLOYMENT"), + azure_openai_endpoint=azure_openai_endpoint, + disable_vectors=dont_use_vectors, + disable_batch_vectors=args.disablebatchvectors, + ) ingestion_strategy: Strategy if use_int_vectorization: - if not openai_embeddings_service or not isinstance(openai_embeddings_service, AzureOpenAIEmbeddingService): + if not openai_embeddings_service or OPENAI_HOST not in [OpenAIHost.AZURE, OpenAIHost.AZURE_CUSTOM]: raise Exception("Integrated vectorization strategy requires an Azure OpenAI embeddings service") ingestion_strategy = IntegratedVectorizerStrategy( diff --git a/app/backend/prepdocslib/embeddings.py b/app/backend/prepdocslib/embeddings.py index fe70da2b1f..ba7a60fcf0 100644 --- a/app/backend/prepdocslib/embeddings.py +++ b/app/backend/prepdocslib/embeddings.py @@ -1,15 +1,11 @@ import logging from abc import ABC from collections.abc import Awaitable, Callable -from typing import Optional from urllib.parse import urljoin import aiohttp import tiktoken -from azure.core.credentials import AzureKeyCredential -from azure.core.credentials_async import AsyncTokenCredential -from azure.identity.aio import get_bearer_token_provider -from openai import AsyncAzureOpenAI, AsyncOpenAI, RateLimitError +from openai import AsyncOpenAI, RateLimitError from tenacity import ( AsyncRetrying, retry_if_exception_type, @@ -22,9 +18,7 @@ class EmbeddingBatch: - """ - Represents a batch of text that is going to be embedded - """ + """Represents a batch of text that is going to be embedded.""" def __init__(self, texts: list[str], token_length: int): self.texts = texts @@ -36,12 +30,9 @@ class ExtraArgs(TypedDict, total=False): class OpenAIEmbeddings(ABC): - """ - Contains common logic across both OpenAI and Azure OpenAI embedding services - Can split source text into batches for more efficient embedding calls - """ + """Client wrapper that handles batching, retries, and token accounting.""" - SUPPORTED_BATCH_AOAI_MODEL = { + SUPPORTED_BATCH_MODEL = { "text-embedding-ada-002": {"token_limit": 8100, "max_batch_size": 16}, "text-embedding-3-small": {"token_limit": 8100, "max_batch_size": 16}, "text-embedding-3-large": {"token_limit": 8100, "max_batch_size": 16}, @@ -52,13 +43,26 @@ class OpenAIEmbeddings(ABC): "text-embedding-3-large": True, } - def __init__(self, open_ai_model_name: str, open_ai_dimensions: int, disable_batch: bool = False): + def __init__( + self, + open_ai_client: AsyncOpenAI, + open_ai_model_name: str, + open_ai_dimensions: int, + *, + disable_batch: bool = False, + azure_deployment_name: str | None = None, + azure_endpoint: str | None = None, + ): + self.open_ai_client = open_ai_client self.open_ai_model_name = open_ai_model_name self.open_ai_dimensions = open_ai_dimensions self.disable_batch = disable_batch + self.azure_deployment_name = azure_deployment_name + self.azure_endpoint = azure_endpoint.rstrip("/") if azure_endpoint else None - async def create_client(self) -> AsyncOpenAI: - raise NotImplementedError + @property + def _api_model(self) -> str: + return self.azure_deployment_name or self.open_ai_model_name def before_retry_sleep(self, retry_state): logger.info("Rate limited on the OpenAI embeddings API, sleeping before retrying...") @@ -68,7 +72,7 @@ def calculate_token_length(self, text: str): return len(encoding.encode(text)) def split_text_into_batches(self, texts: list[str]) -> list[EmbeddingBatch]: - batch_info = OpenAIEmbeddings.SUPPORTED_BATCH_AOAI_MODEL.get(self.open_ai_model_name) + batch_info = OpenAIEmbeddings.SUPPORTED_BATCH_MODEL.get(self.open_ai_model_name) if not batch_info: raise NotImplementedError( f"Model {self.open_ai_model_name} is not supported with batch embedding operations" @@ -101,7 +105,6 @@ def split_text_into_batches(self, texts: list[str]) -> list[EmbeddingBatch]: async def create_embedding_batch(self, texts: list[str], dimensions_args: ExtraArgs) -> list[list[float]]: batches = self.split_text_into_batches(texts) embeddings = [] - client = await self.create_client() for batch in batches: async for attempt in AsyncRetrying( retry=retry_if_exception_type(RateLimitError), @@ -110,8 +113,8 @@ async def create_embedding_batch(self, texts: list[str], dimensions_args: ExtraA before_sleep=self.before_retry_sleep, ): with attempt: - emb_response = await client.embeddings.create( - model=self.open_ai_model_name, input=batch.texts, **dimensions_args + emb_response = await self.open_ai_client.embeddings.create( + model=self._api_model, input=batch.texts, **dimensions_args ) embeddings.extend([data.embedding for data in emb_response.data]) logger.info( @@ -123,7 +126,6 @@ async def create_embedding_batch(self, texts: list[str], dimensions_args: ExtraA return embeddings async def create_embedding_single(self, text: str, dimensions_args: ExtraArgs) -> list[float]: - client = await self.create_client() async for attempt in AsyncRetrying( retry=retry_if_exception_type(RateLimitError), wait=wait_random_exponential(min=15, max=60), @@ -131,8 +133,8 @@ async def create_embedding_single(self, text: str, dimensions_args: ExtraArgs) - before_sleep=self.before_retry_sleep, ): with attempt: - emb_response = await client.embeddings.create( - model=self.open_ai_model_name, input=text, **dimensions_args + emb_response = await self.open_ai_client.embeddings.create( + model=self._api_model, input=text, **dimensions_args ) logger.info("Computed embedding for text section. Character count: %d", len(text)) @@ -146,86 +148,12 @@ async def create_embeddings(self, texts: list[str]) -> list[list[float]]: else {} ) - if not self.disable_batch and self.open_ai_model_name in OpenAIEmbeddings.SUPPORTED_BATCH_AOAI_MODEL: + if not self.disable_batch and self.open_ai_model_name in OpenAIEmbeddings.SUPPORTED_BATCH_MODEL: return await self.create_embedding_batch(texts, dimensions_args) return [await self.create_embedding_single(text, dimensions_args) for text in texts] -class AzureOpenAIEmbeddingService(OpenAIEmbeddings): - """ - Class for using Azure OpenAI embeddings - To learn more please visit https://learn.microsoft.com/azure/ai-services/openai/concepts/understand-embeddings - """ - - def __init__( - self, - open_ai_service: Optional[str], - open_ai_deployment: Optional[str], - open_ai_model_name: str, - open_ai_dimensions: int, - open_ai_api_version: str, - credential: AsyncTokenCredential | AzureKeyCredential, - open_ai_custom_url: Optional[str] = None, - disable_batch: bool = False, - ): - super().__init__(open_ai_model_name, open_ai_dimensions, disable_batch) - self.open_ai_service = open_ai_service - if open_ai_service: - self.open_ai_endpoint = f"https://{open_ai_service}.openai.azure.com" - elif open_ai_custom_url: - self.open_ai_endpoint = open_ai_custom_url - else: - raise ValueError("Either open_ai_service or open_ai_custom_url must be provided") - self.open_ai_deployment = open_ai_deployment - self.open_ai_api_version = open_ai_api_version - self.credential = credential - - async def create_client(self) -> AsyncOpenAI: - class AuthArgs(TypedDict, total=False): - api_key: str - azure_ad_token_provider: Callable[[], str | Awaitable[str]] - - auth_args = AuthArgs() - if isinstance(self.credential, AzureKeyCredential): - auth_args["api_key"] = self.credential.key - elif isinstance(self.credential, AsyncTokenCredential): - auth_args["azure_ad_token_provider"] = get_bearer_token_provider( - self.credential, "https://cognitiveservices.azure.com/.default" - ) - else: - raise TypeError("Invalid credential type") - - return AsyncAzureOpenAI( - azure_endpoint=self.open_ai_endpoint, - azure_deployment=self.open_ai_deployment, - api_version=self.open_ai_api_version, - **auth_args, - ) - - -class OpenAIEmbeddingService(OpenAIEmbeddings): - """ - Class for using OpenAI embeddings - To learn more please visit https://platform.openai.com/docs/guides/embeddings - """ - - def __init__( - self, - open_ai_model_name: str, - open_ai_dimensions: int, - credential: str, - organization: Optional[str] = None, - disable_batch: bool = False, - ): - super().__init__(open_ai_model_name, open_ai_dimensions, disable_batch) - self.credential = credential - self.organization = organization - - async def create_client(self) -> AsyncOpenAI: - return AsyncOpenAI(api_key=self.credential, organization=self.organization) - - class ImageEmbeddings: """ Class for using image embeddings from Azure AI Vision diff --git a/app/backend/prepdocslib/integratedvectorizerstrategy.py b/app/backend/prepdocslib/integratedvectorizerstrategy.py index 11e826c3e4..589b8d9888 100644 --- a/app/backend/prepdocslib/integratedvectorizerstrategy.py +++ b/app/backend/prepdocslib/integratedvectorizerstrategy.py @@ -21,7 +21,7 @@ ) from .blobmanager import BlobManager -from .embeddings import AzureOpenAIEmbeddingService +from .embeddings import OpenAIEmbeddings from .listfilestrategy import ListFileStrategy from .searchmanager import SearchManager from .strategy import DocumentAction, SearchInfo, Strategy @@ -29,7 +29,7 @@ logger = logging.getLogger("scripts") -class IntegratedVectorizerStrategy(Strategy): +class IntegratedVectorizerStrategy(Strategy): # pragma: no cover """ Strategy for ingesting and vectorizing documents into a search service from files stored storage account """ @@ -39,7 +39,7 @@ def __init__( list_file_strategy: ListFileStrategy, blob_manager: BlobManager, search_info: SearchInfo, - embeddings: AzureOpenAIEmbeddingService, + embeddings: OpenAIEmbeddings, search_field_name_embedding: str, subscription_id: str, document_action: DocumentAction = DocumentAction.Add, @@ -83,12 +83,15 @@ async def create_embedding_skill(self, index_name: str) -> SearchIndexerSkillset outputs=[OutputFieldMappingEntry(name="textItems", target_name="pages")], ) + if not self.embeddings.azure_endpoint or not self.embeddings.azure_deployment_name: + raise ValueError("Integrated vectorization requires Azure OpenAI endpoint and deployment") + embedding_skill = AzureOpenAIEmbeddingSkill( name="embedding-skill", description="Skill to generate embeddings via Azure OpenAI", context="/document/pages/*", - resource_url=f"https://{self.embeddings.open_ai_service}.openai.azure.com", - deployment_name=self.embeddings.open_ai_deployment, + resource_url=self.embeddings.azure_endpoint, + deployment_name=self.embeddings.azure_deployment_name, model_name=self.embeddings.open_ai_model_name, dimensions=self.embeddings.open_ai_dimensions, inputs=[ diff --git a/app/backend/prepdocslib/searchmanager.py b/app/backend/prepdocslib/searchmanager.py index 0fa6a20d96..59455fa87b 100644 --- a/app/backend/prepdocslib/searchmanager.py +++ b/app/backend/prepdocslib/searchmanager.py @@ -38,7 +38,7 @@ ) from .blobmanager import BlobManager -from .embeddings import AzureOpenAIEmbeddingService, OpenAIEmbeddings +from .embeddings import OpenAIEmbeddings from .listfilestrategy import File from .strategy import SearchInfo from .textsplitter import Chunk @@ -109,12 +109,12 @@ async def create_index(self): ) text_vectorizer = None - if isinstance(self.embeddings, AzureOpenAIEmbeddingService): + if self.embeddings.azure_endpoint and self.embeddings.azure_deployment_name: text_vectorizer = AzureOpenAIVectorizer( vectorizer_name=f"{self.embeddings.open_ai_model_name}-vectorizer", parameters=AzureOpenAIVectorizerParameters( - resource_url=self.embeddings.open_ai_endpoint, - deployment_name=self.embeddings.open_ai_deployment, + resource_url=self.embeddings.azure_endpoint, + deployment_name=self.embeddings.azure_deployment_name, model_name=self.embeddings.open_ai_model_name, ), ) @@ -426,14 +426,18 @@ async def create_index(self): existing_index.vector_search.vectorizers is None or len(existing_index.vector_search.vectorizers) == 0 ): - if self.embeddings is not None and isinstance(self.embeddings, AzureOpenAIEmbeddingService): + if ( + self.embeddings is not None + and self.embeddings.azure_endpoint + and self.embeddings.azure_deployment_name + ): logger.info("Adding vectorizer to search index %s", self.search_info.index_name) existing_index.vector_search.vectorizers = [ AzureOpenAIVectorizer( vectorizer_name=f"{self.search_info.index_name}-vectorizer", parameters=AzureOpenAIVectorizerParameters( - resource_url=self.embeddings.open_ai_endpoint, - deployment_name=self.embeddings.open_ai_deployment, + resource_url=self.embeddings.azure_endpoint, + deployment_name=self.embeddings.azure_deployment_name, model_name=self.embeddings.open_ai_model_name, ), ) diff --git a/app/backend/requirements.in b/app/backend/requirements.in index 0a32dd6d5c..756a857192 100644 --- a/app/backend/requirements.in +++ b/app/backend/requirements.in @@ -1,8 +1,7 @@ azure-identity quart quart-cors -flask==3.1.2 -openai>=1.3.7 +openai>=1.109.1 tiktoken tenacity azure-ai-documentintelligence==1.0.0b4 diff --git a/app/backend/requirements.txt b/app/backend/requirements.txt index ab502bb225..5eab109d1f 100644 --- a/app/backend/requirements.txt +++ b/app/backend/requirements.txt @@ -103,9 +103,7 @@ exceptiongroup==1.3.0 fixedint==0.1.6 # via azure-monitor-opentelemetry-exporter flask==3.1.2 - # via - # -r requirements.in - # quart + # via quart frozenlist==1.4.1 # via # aiohttp @@ -209,7 +207,7 @@ multidict==6.7.0 # yarl oauthlib==3.3.1 # via requests-oauthlib -openai==1.99.8 +openai==2.6.1 # via -r requirements.in opentelemetry-api==1.38.0 # via @@ -383,7 +381,6 @@ six==1.16.0 sniffio==1.3.1 # via # anyio - # httpx # openai soupsieve==2.7 # via beautifulsoup4 @@ -394,9 +391,7 @@ taskgroup==0.2.2 tenacity==9.1.2 # via -r requirements.in tiktoken==0.12.0 - # via - # -r requirements.in - # opentelemetry-instrumentation-openai + # via -r requirements.in tomli==2.2.1 # via hypercorn tqdm==4.66.5 diff --git a/docs/reasoning.md b/docs/reasoning.md index 45dbb3a99e..3067e977da 100644 --- a/docs/reasoning.md +++ b/docs/reasoning.md @@ -31,7 +31,6 @@ This repository includes an optional feature that uses reasoning models to gener azd env set AZURE_OPENAI_CHATGPT_DEPLOYMENT gpt-5 azd env set AZURE_OPENAI_CHATGPT_DEPLOYMENT_VERSION 2025-08-07 azd env set AZURE_OPENAI_CHATGPT_DEPLOYMENT_SKU GlobalStandard - azd env set AZURE_OPENAI_API_VERSION 2025-04-01-preview ``` For gpt-5-mini: @@ -41,7 +40,6 @@ This repository includes an optional feature that uses reasoning models to gener azd env set AZURE_OPENAI_CHATGPT_DEPLOYMENT gpt-5-mini azd env set AZURE_OPENAI_CHATGPT_DEPLOYMENT_VERSION 2025-08-07 azd env set AZURE_OPENAI_CHATGPT_DEPLOYMENT_SKU GlobalStandard - azd env set AZURE_OPENAI_API_VERSION 2025-04-01-preview ``` For gpt-5-nano: @@ -51,7 +49,6 @@ This repository includes an optional feature that uses reasoning models to gener azd env set AZURE_OPENAI_CHATGPT_DEPLOYMENT gpt-5-nano azd env set AZURE_OPENAI_CHATGPT_DEPLOYMENT_VERSION 2025-08-07 azd env set AZURE_OPENAI_CHATGPT_DEPLOYMENT_SKU GlobalStandard - azd env set AZURE_OPENAI_API_VERSION 2025-04-01-preview ``` For o4-mini: @@ -61,7 +58,6 @@ This repository includes an optional feature that uses reasoning models to gener azd env set AZURE_OPENAI_CHATGPT_DEPLOYMENT o4-mini azd env set AZURE_OPENAI_CHATGPT_DEPLOYMENT_VERSION 2025-04-16 azd env set AZURE_OPENAI_CHATGPT_DEPLOYMENT_SKU GlobalStandard - azd env set AZURE_OPENAI_API_VERSION 2025-04-01-preview ``` For o3: @@ -71,7 +67,6 @@ This repository includes an optional feature that uses reasoning models to gener azd env set AZURE_OPENAI_CHATGPT_DEPLOYMENT o3 azd env set AZURE_OPENAI_CHATGPT_DEPLOYMENT_VERSION 2025-04-16 azd env set AZURE_OPENAI_CHATGPT_DEPLOYMENT_SKU GlobalStandard - azd env set AZURE_OPENAI_API_VERSION 2025-04-01-preview ``` For o3-mini: (No vision support) @@ -81,7 +76,6 @@ This repository includes an optional feature that uses reasoning models to gener azd env set AZURE_OPENAI_CHATGPT_DEPLOYMENT o4-mini azd env set AZURE_OPENAI_CHATGPT_DEPLOYMENT_VERSION 2025-04-16 azd env set AZURE_OPENAI_CHATGPT_DEPLOYMENT_SKU GlobalStandard - azd env set AZURE_OPENAI_API_VERSION 2025-04-01-preview ``` For o1: (No streaming support) @@ -91,7 +85,6 @@ This repository includes an optional feature that uses reasoning models to gener azd env set AZURE_OPENAI_CHATGPT_DEPLOYMENT o1 azd env set AZURE_OPENAI_CHATGPT_DEPLOYMENT_VERSION 2024-12-17 azd env set AZURE_OPENAI_CHATGPT_DEPLOYMENT_SKU GlobalStandard - azd env set AZURE_OPENAI_API_VERSION 2024-12-01-preview ``` 2. **(Optional) Set default reasoning effort** diff --git a/infra/main.bicep b/infra/main.bicep index 030acab981..65b4893662 100644 --- a/infra/main.bicep +++ b/infra/main.bicep @@ -116,7 +116,6 @@ param openAiHost string // Set in main.parameters.json param isAzureOpenAiHost bool = startsWith(openAiHost, 'azure') param deployAzureOpenAi bool = openAiHost == 'azure' param azureOpenAiCustomUrl string = '' -param azureOpenAiApiVersion string = '' @secure() param azureOpenAiApiKey string = '' param azureOpenAiDisableKeys bool = true @@ -502,7 +501,6 @@ var appEnvVariables = { AZURE_OPENAI_EMB_DEPLOYMENT: embedding.deploymentName AZURE_OPENAI_SEARCHAGENT_MODEL: searchAgent.modelName AZURE_OPENAI_SEARCHAGENT_DEPLOYMENT: searchAgent.deploymentName - AZURE_OPENAI_API_VERSION: azureOpenAiApiVersion AZURE_OPENAI_API_KEY_OVERRIDE: azureOpenAiApiKey AZURE_OPENAI_CUSTOM_URL: azureOpenAiCustomUrl // Used only with non-Azure OpenAI deployments @@ -1406,7 +1404,6 @@ output AZURE_OPENAI_CHATGPT_MODEL string = chatGpt.modelName // Specific to Azure OpenAI output AZURE_OPENAI_SERVICE string = isAzureOpenAiHost && deployAzureOpenAi ? openAi.outputs.name : '' output AZURE_OPENAI_ENDPOINT string = isAzureOpenAiHost && deployAzureOpenAi ? openAi.outputs.endpoint : '' -output AZURE_OPENAI_API_VERSION string = isAzureOpenAiHost ? azureOpenAiApiVersion : '' output AZURE_OPENAI_RESOURCE_GROUP string = isAzureOpenAiHost ? openAiResourceGroup.name : '' output AZURE_OPENAI_CHATGPT_DEPLOYMENT string = isAzureOpenAiHost ? chatGpt.deploymentName : '' output AZURE_OPENAI_CHATGPT_DEPLOYMENT_VERSION string = isAzureOpenAiHost ? chatGpt.deploymentVersion : '' diff --git a/infra/main.parameters.json b/infra/main.parameters.json index 7a637c8022..606bbb4915 100644 --- a/infra/main.parameters.json +++ b/infra/main.parameters.json @@ -173,9 +173,6 @@ "azureOpenAiCustomUrl": { "value": "${AZURE_OPENAI_CUSTOM_URL}" }, - "azureOpenAiApiVersion": { - "value": "${AZURE_OPENAI_API_VERSION}" - }, "azureOpenAiApiKey": { "value": "${AZURE_OPENAI_API_KEY_OVERRIDE}" }, diff --git a/tests/test_app_config.py b/tests/test_app_config.py index 2d89108e85..0f94defbe3 100644 --- a/tests/test_app_config.py +++ b/tests/test_app_config.py @@ -16,8 +16,10 @@ def minimal_env(monkeypatch): monkeypatch.setenv("AZURE_SEARCH_SERVICE", "test-search-service") monkeypatch.setenv("AZURE_OPENAI_SERVICE", "test-openai-service") monkeypatch.setenv("AZURE_OPENAI_CHATGPT_MODEL", "gpt-4.1-mini") + monkeypatch.setenv("AZURE_OPENAI_CHATGPT_DEPLOYMENT", "test-chat-deployment") monkeypatch.setenv("AZURE_OPENAI_EMB_MODEL_NAME", "text-embedding-3-large") monkeypatch.setenv("AZURE_OPENAI_EMB_DIMENSIONS", "3072") + monkeypatch.setenv("AZURE_OPENAI_EMB_DEPLOYMENT", "test-emb-deployment") yield @@ -41,7 +43,7 @@ async def test_app_azure_custom_key(monkeypatch, minimal_env): quart_app = app.create_app() async with quart_app.test_app(): assert quart_app.config[app.CONFIG_OPENAI_CLIENT].api_key == "azure-api-key" - assert quart_app.config[app.CONFIG_OPENAI_CLIENT].base_url == "http://azureapi.com/api/v1/openai/" + assert quart_app.config[app.CONFIG_OPENAI_CLIENT].base_url == "http://azureapi.com/api/v1/" @pytest.mark.asyncio @@ -51,8 +53,11 @@ async def test_app_azure_custom_identity(monkeypatch, minimal_env): quart_app = app.create_app() async with quart_app.test_app(): - assert quart_app.config[app.CONFIG_OPENAI_CLIENT].api_key == "" - assert quart_app.config[app.CONFIG_OPENAI_CLIENT].base_url == "http://azureapi.com/api/v1/openai/" + openai_client = quart_app.config[app.CONFIG_OPENAI_CLIENT] + assert openai_client.api_key == "" + # The AsyncOpenAI client stores the callable inside _api_key_provider + assert getattr(openai_client, "_api_key_provider", None) is not None + assert str(openai_client.base_url) == "http://azureapi.com/api/v1/" @pytest.mark.asyncio diff --git a/tests/test_prepdocs.py b/tests/test_prepdocs.py index 77cb29c0be..795d18c2b8 100644 --- a/tests/test_prepdocs.py +++ b/tests/test_prepdocs.py @@ -11,11 +11,7 @@ from openai.types.create_embedding_response import Usage import prepdocs -from prepdocslib.embeddings import ( - AzureOpenAIEmbeddingService, - ImageEmbeddings, - OpenAIEmbeddingService, -) +from prepdocslib.embeddings import ImageEmbeddings, OpenAIEmbeddings from .mocks import ( MOCK_EMBEDDING_DIMENSIONS, @@ -38,74 +34,30 @@ def __init__(self, embeddings_client): @pytest.mark.asyncio -async def test_compute_embedding_success(monkeypatch): - async def mock_create_client(*args, **kwargs): - # From https://platform.openai.com/docs/api-reference/embeddings/create - return MockClient( - embeddings_client=MockEmbeddingsClient( - create_embedding_response=openai.types.CreateEmbeddingResponse( - object="list", - data=[ - openai.types.Embedding( - embedding=[ - 0.0023064255, - -0.009327292, - -0.0028842222, - ], - index=0, - object="embedding", - ) - ], - model="text-embedding-3-large", - usage=Usage(prompt_tokens=8, total_tokens=8), - ) +async def test_compute_embedding_success(): + response = openai.types.CreateEmbeddingResponse( + object="list", + data=[ + openai.types.Embedding( + embedding=[ + 0.0023064255, + -0.009327292, + -0.0028842222, + ], + index=0, + object="embedding", ) - ) - - embeddings = AzureOpenAIEmbeddingService( - open_ai_service="x", - open_ai_deployment="x", - open_ai_model_name=MOCK_EMBEDDING_MODEL_NAME, - open_ai_dimensions=MOCK_EMBEDDING_DIMENSIONS, - open_ai_api_version="test-api-version", - credential=MockAzureCredential(), - disable_batch=False, + ], + model="text-embedding-3-large", + usage=Usage(prompt_tokens=8, total_tokens=8), ) - monkeypatch.setattr(embeddings, "create_client", mock_create_client) - assert await embeddings.create_embeddings(texts=["foo"]) == [ - [ - 0.0023064255, - -0.009327292, - -0.0028842222, - ] - ] - embeddings = AzureOpenAIEmbeddingService( - open_ai_service="x", - open_ai_deployment="x", + embeddings = OpenAIEmbeddings( + open_ai_client=MockClient(MockEmbeddingsClient(response)), open_ai_model_name=MOCK_EMBEDDING_MODEL_NAME, open_ai_dimensions=MOCK_EMBEDDING_DIMENSIONS, - open_ai_api_version="test-api-version", - credential=MockAzureCredential(), - disable_batch=True, - ) - monkeypatch.setattr(embeddings, "create_client", mock_create_client) - assert await embeddings.create_embeddings(texts=["foo"]) == [ - [ - 0.0023064255, - -0.009327292, - -0.0028842222, - ] - ] - - embeddings = OpenAIEmbeddingService( - open_ai_model_name=MOCK_EMBEDDING_MODEL_NAME, - open_ai_dimensions=MOCK_EMBEDDING_DIMENSIONS, - credential=MockAzureCredential(), - organization="org", disable_batch=False, ) - monkeypatch.setattr(embeddings, "create_client", mock_create_client) assert await embeddings.create_embeddings(texts=["foo"]) == [ [ 0.0023064255, @@ -114,14 +66,12 @@ async def mock_create_client(*args, **kwargs): ] ] - embeddings = OpenAIEmbeddingService( + embeddings = OpenAIEmbeddings( + open_ai_client=MockClient(MockEmbeddingsClient(response)), open_ai_model_name=MOCK_EMBEDDING_MODEL_NAME, open_ai_dimensions=MOCK_EMBEDDING_DIMENSIONS, - credential=MockAzureCredential(), - organization="org", disable_batch=True, ) - monkeypatch.setattr(embeddings, "create_client", mock_create_client) assert await embeddings.create_embeddings(texts=["foo"]) == [ [ 0.0023064255, @@ -149,18 +99,17 @@ async def create_rate_limit_client(*args, **kwargs): @pytest.mark.asyncio async def test_compute_embedding_ratelimiterror_batch(monkeypatch, caplog): with caplog.at_level(logging.INFO): - monkeypatch.setattr(tenacity.wait_random_exponential, "__call__", lambda x, y: 0) + monkeypatch.setattr( + "prepdocslib.embeddings.wait_random_exponential", + lambda *args, **kwargs: tenacity.wait_fixed(0), + ) with pytest.raises(tenacity.RetryError): - embeddings = AzureOpenAIEmbeddingService( - open_ai_service="x", - open_ai_deployment="x", + embeddings = OpenAIEmbeddings( + open_ai_client=MockClient(RateLimitMockEmbeddingsClient()), open_ai_model_name=MOCK_EMBEDDING_MODEL_NAME, open_ai_dimensions=MOCK_EMBEDDING_DIMENSIONS, - open_ai_api_version="test-api-version", - credential=MockAzureCredential(), disable_batch=False, ) - monkeypatch.setattr(embeddings, "create_client", create_rate_limit_client) await embeddings.create_embeddings(texts=["foo"]) assert caplog.text.count("Rate limited on the OpenAI embeddings API") == 14 @@ -168,18 +117,17 @@ async def test_compute_embedding_ratelimiterror_batch(monkeypatch, caplog): @pytest.mark.asyncio async def test_compute_embedding_ratelimiterror_single(monkeypatch, caplog): with caplog.at_level(logging.INFO): - monkeypatch.setattr(tenacity.wait_random_exponential, "__call__", lambda x, y: 0) + monkeypatch.setattr( + "prepdocslib.embeddings.wait_random_exponential", + lambda *args, **kwargs: tenacity.wait_fixed(0), + ) with pytest.raises(tenacity.RetryError): - embeddings = AzureOpenAIEmbeddingService( - open_ai_service="x", - open_ai_deployment="x", + embeddings = OpenAIEmbeddings( + open_ai_client=MockClient(RateLimitMockEmbeddingsClient()), open_ai_model_name=MOCK_EMBEDDING_MODEL_NAME, open_ai_dimensions=MOCK_EMBEDDING_DIMENSIONS, - open_ai_api_version="test-api-version", - credential=MockAzureCredential(), disable_batch=True, ) - monkeypatch.setattr(embeddings, "create_client", create_rate_limit_client) await embeddings.create_embeddings(texts=["foo"]) assert caplog.text.count("Rate limited on the OpenAI embeddings API") == 14 @@ -189,37 +137,28 @@ async def create(self, *args, **kwargs) -> openai.types.CreateEmbeddingResponse: raise openai.AuthenticationError(message="Bad things happened.", response=fake_response(403), body=None) -async def create_auth_error_limit_client(*args, **kwargs): - return MockClient(embeddings_client=AuthenticationErrorMockEmbeddingsClient()) - - @pytest.mark.asyncio -async def test_compute_embedding_autherror(monkeypatch, capsys): - monkeypatch.setattr(tenacity.wait_random_exponential, "__call__", lambda x, y: 0) +async def test_compute_embedding_autherror(monkeypatch): + monkeypatch.setattr( + "prepdocslib.embeddings.wait_random_exponential", + lambda *args, **kwargs: tenacity.wait_fixed(0), + ) with pytest.raises(openai.AuthenticationError): - embeddings = AzureOpenAIEmbeddingService( - open_ai_service="x", - open_ai_deployment="x", + embeddings = OpenAIEmbeddings( + open_ai_client=MockClient(AuthenticationErrorMockEmbeddingsClient()), open_ai_model_name=MOCK_EMBEDDING_MODEL_NAME, open_ai_dimensions=MOCK_EMBEDDING_DIMENSIONS, - open_ai_api_version="test-api-version", - credential=MockAzureCredential(), disable_batch=False, ) - monkeypatch.setattr(embeddings, "create_client", create_auth_error_limit_client) await embeddings.create_embeddings(texts=["foo"]) with pytest.raises(openai.AuthenticationError): - embeddings = AzureOpenAIEmbeddingService( - open_ai_service="x", - open_ai_deployment="x", + embeddings = OpenAIEmbeddings( + open_ai_client=MockClient(AuthenticationErrorMockEmbeddingsClient()), open_ai_model_name=MOCK_EMBEDDING_MODEL_NAME, open_ai_dimensions=MOCK_EMBEDDING_DIMENSIONS, - open_ai_api_version="test-api-version", - credential=MockAzureCredential(), disable_batch=True, ) - monkeypatch.setattr(embeddings, "create_client", create_auth_error_limit_client) await embeddings.create_embeddings(texts=["foo"]) @@ -329,53 +268,105 @@ def __init__( assert captured["enable_global_documents"] is True -@pytest.mark.asyncio -async def test_azure_embedding_service_create_client_uses_token_provider( - monkeypatch: pytest.MonkeyPatch, -) -> None: - async def fake_provider() -> str: - return "token" +def test_setup_embeddings_service_populates_azure_metadata() -> None: + embeddings = prepdocs.setup_embeddings_service( + open_ai_client=MockClient( + MockEmbeddingsClient( + openai.types.CreateEmbeddingResponse( + object="list", + data=[], + model="text-embedding-3-large", + usage=Usage(prompt_tokens=0, total_tokens=0), + ) + ) + ), + openai_host=prepdocs.OpenAIHost.AZURE, + emb_model_name=MOCK_EMBEDDING_MODEL_NAME, + emb_model_dimensions=MOCK_EMBEDDING_DIMENSIONS, + azure_openai_deployment="deployment", + azure_openai_endpoint="https://service.openai.azure.com", + ) - def fake_get_bearer_token_provider(credential: object, scope: str): - assert scope == "https://cognitiveservices.azure.com/.default" - return fake_provider + assert isinstance(embeddings, OpenAIEmbeddings) + assert embeddings.azure_deployment_name == "deployment" + assert embeddings.azure_endpoint == "https://service.openai.azure.com" + + +def test_setup_embeddings_service_requires_endpoint_for_azure() -> None: + with pytest.raises(ValueError): + prepdocs.setup_embeddings_service( + open_ai_client=MockClient( + MockEmbeddingsClient( + openai.types.CreateEmbeddingResponse( + object="list", + data=[], + model="text-embedding-3-large", + usage=Usage(prompt_tokens=0, total_tokens=0), + ) + ) + ), + openai_host=prepdocs.OpenAIHost.AZURE, + emb_model_name=MOCK_EMBEDDING_MODEL_NAME, + emb_model_dimensions=MOCK_EMBEDDING_DIMENSIONS, + azure_openai_deployment="deployment", + azure_openai_endpoint=None, + ) - captured: dict[str, object] = {} - class StubAsyncAzureOpenAI: - def __init__( - self, - *, - azure_endpoint: str, - azure_deployment: str | None, - api_version: str, - **auth_args: object, - ) -> None: - captured["endpoint"] = azure_endpoint - captured["deployment"] = azure_deployment - captured["api_version"] = api_version - captured["auth_args"] = auth_args +def test_setup_embeddings_service_requires_deployment_for_azure() -> None: + with pytest.raises(ValueError): + prepdocs.setup_embeddings_service( + open_ai_client=MockClient( + MockEmbeddingsClient( + openai.types.CreateEmbeddingResponse( + object="list", + data=[], + model="text-embedding-3-large", + usage=Usage(prompt_tokens=0, total_tokens=0), + ) + ) + ), + openai_host=prepdocs.OpenAIHost.AZURE, + emb_model_name=MOCK_EMBEDDING_MODEL_NAME, + emb_model_dimensions=MOCK_EMBEDDING_DIMENSIONS, + azure_openai_deployment=None, + azure_openai_endpoint="https://service.openai.azure.com", + ) - monkeypatch.setattr("prepdocslib.embeddings.get_bearer_token_provider", fake_get_bearer_token_provider) - monkeypatch.setattr("prepdocslib.embeddings.AsyncAzureOpenAI", StubAsyncAzureOpenAI) - service = AzureOpenAIEmbeddingService( - open_ai_service="service", - open_ai_deployment="deployment", +@pytest.mark.asyncio +async def test_openai_embeddings_use_deployment_for_azure_model(): + class RecordingEmbeddingsClient: + def __init__(self) -> None: + self.models: list[str] = [] + + async def create(self, *, model: str, input, **kwargs): + self.models.append(model) + data = [ + openai.types.Embedding(embedding=[0.1, 0.2, 0.3], index=i, object="embedding") + for i, _ in enumerate(input) + ] + return openai.types.CreateEmbeddingResponse( + object="list", + data=data, + model=model, + usage=Usage(prompt_tokens=0, total_tokens=0), + ) + + recording_client = RecordingEmbeddingsClient() + embeddings = OpenAIEmbeddings( + open_ai_client=MockClient(recording_client), open_ai_model_name=MOCK_EMBEDDING_MODEL_NAME, open_ai_dimensions=MOCK_EMBEDDING_DIMENSIONS, - open_ai_api_version="2024-06-01", - credential=MockAzureCredential(), + disable_batch=False, + azure_deployment_name="azure-deployment", + azure_endpoint="https://service.openai.azure.com", ) - client = await service.create_client() + result = await embeddings.create_embeddings(["foo"]) - assert isinstance(client, StubAsyncAzureOpenAI) - assert captured["endpoint"] == "https://service.openai.azure.com" - assert "azure_ad_token_provider" in captured["auth_args"] - provider = captured["auth_args"]["azure_ad_token_provider"] - assert callable(provider) - assert await provider() == "token" + assert recording_client.models == ["azure-deployment"] + assert len(result) == 1 @pytest.mark.asyncio @@ -433,3 +424,99 @@ async def run(self) -> None: assert captured["credentials"].key == "secret" assert captured["service_name"] == "searchsvc" assert captured["index_name"] == "searchindex" + + +def test_setup_openai_client_azure_constructs_endpoint_correctly(monkeypatch: pytest.MonkeyPatch) -> None: + """Test that setup_openai_client correctly constructs the Azure OpenAI endpoint URL from service name.""" + captured_base_url: list[str] = [] + + class StubAsyncOpenAI: + def __init__(self, *, base_url: str, api_key, **kwargs) -> None: + captured_base_url.append(base_url) + + monkeypatch.setattr(prepdocs, "AsyncOpenAI", StubAsyncOpenAI) + monkeypatch.setattr(prepdocs, "get_bearer_token_provider", lambda *args, **kwargs: lambda: "fake_token") + + client, endpoint = prepdocs.setup_openai_client( + openai_host=prepdocs.OpenAIHost.AZURE, + azure_credential=MockAzureCredential(), + azure_openai_service="myopenaiservice", + ) + + # Verify the endpoint is constructed correctly + assert endpoint == "https://myopenaiservice.openai.azure.com" + # Verify the base_url includes the endpoint with the openai/v1 suffix + assert captured_base_url[0] == "https://myopenaiservice.openai.azure.com/openai/v1" + + +def test_setup_openai_client_azure_custom_uses_custom_url(monkeypatch: pytest.MonkeyPatch) -> None: + """Test that setup_openai_client uses the custom URL for azure_custom host.""" + captured_base_url: list[str] = [] + + class StubAsyncOpenAI: + def __init__(self, *, base_url: str, api_key, **kwargs) -> None: + captured_base_url.append(base_url) + + monkeypatch.setattr(prepdocs, "AsyncOpenAI", StubAsyncOpenAI) + + client, endpoint = prepdocs.setup_openai_client( + openai_host=prepdocs.OpenAIHost.AZURE_CUSTOM, + azure_credential=MockAzureCredential(), + azure_openai_custom_url="https://custom.endpoint.com/openai", + azure_openai_api_key="test-key", + ) + + # Verify the custom URL is used + assert captured_base_url[0] == "https://custom.endpoint.com/openai" + # Verify endpoint is None for custom URLs + assert endpoint is None + + +def test_setup_openai_client_azure_respects_api_key(monkeypatch: pytest.MonkeyPatch) -> None: + """Test that setup_openai_client uses the API key override when provided.""" + captured_api_key: list[str] = [] + + class StubAsyncOpenAI: + def __init__(self, *, base_url: str, api_key: str, **kwargs) -> None: + captured_api_key.append(api_key) + + monkeypatch.setattr(prepdocs, "AsyncOpenAI", StubAsyncOpenAI) + + client, endpoint = prepdocs.setup_openai_client( + openai_host=prepdocs.OpenAIHost.AZURE, + azure_credential=MockAzureCredential(), + azure_openai_service="myopenaiservice", + azure_openai_api_key="my-api-key-override", + ) + + assert captured_api_key[0] == "my-api-key-override" + + +def test_setup_openai_client_openai_requires_api_key() -> None: + """Test that setup_openai_client raises ValueError when using OpenAI without API key.""" + with pytest.raises(ValueError, match="OpenAI key is required"): + prepdocs.setup_openai_client( + openai_host=prepdocs.OpenAIHost.OPENAI, + azure_credential=MockAzureCredential(), + openai_api_key=None, + ) + + +def test_setup_openai_client_azure_requires_service() -> None: + """Test that setup_openai_client raises ValueError when using Azure without service name.""" + with pytest.raises(ValueError, match="AZURE_OPENAI_SERVICE must be set"): + prepdocs.setup_openai_client( + openai_host=prepdocs.OpenAIHost.AZURE, + azure_credential=MockAzureCredential(), + azure_openai_service=None, + ) + + +def test_setup_openai_client_azure_custom_requires_url() -> None: + """Test that setup_openai_client raises ValueError when using azure_custom without custom URL.""" + with pytest.raises(ValueError, match="AZURE_OPENAI_CUSTOM_URL must be set"): + prepdocs.setup_openai_client( + openai_host=prepdocs.OpenAIHost.AZURE_CUSTOM, + azure_credential=MockAzureCredential(), + azure_openai_custom_url=None, + ) diff --git a/tests/test_searchmanager.py b/tests/test_searchmanager.py index 6cf7b6d10a..5aeb31685d 100644 --- a/tests/test_searchmanager.py +++ b/tests/test_searchmanager.py @@ -12,10 +12,11 @@ SearchIndex, SearchIndexPermissionFilterOption, SimpleField, + VectorSearch, ) from openai.types.create_embedding_response import Usage -from prepdocslib.embeddings import AzureOpenAIEmbeddingService +from prepdocslib.embeddings import OpenAIEmbeddings from prepdocslib.listfilestrategy import File from prepdocslib.page import ImageOnPage from prepdocslib.searchmanager import SearchManager, Section @@ -155,6 +156,73 @@ async def mock_create_or_update_index(self, index, *args, **kwargs): assert updated_indexes[0].fields[0].name == "storageUrl" +@pytest.mark.asyncio +async def test_create_index_adds_vectorizer_to_existing_index(monkeypatch, search_info): + """Test that a vectorizer is added to an existing index when embeddings are configured.""" + created_indexes = [] + updated_indexes = [] + + async def mock_create_index(self, index): + created_indexes.append(index) # pragma: no cover + + async def mock_list_index_names(self): + yield "test" + + async def mock_get_index(self, *args, **kwargs): + # Return an existing index with vector_search but no vectorizers + # Include embedding field to avoid triggering the embedding field addition code path + return SearchIndex( + name="test", + fields=[ + SimpleField( + name="storageUrl", + type=SearchFieldDataType.String, + filterable=True, + ), + SimpleField( + name="embedding", + type=SearchFieldDataType.Collection(SearchFieldDataType.Single), + searchable=True, + vector_search_dimensions=MOCK_EMBEDDING_DIMENSIONS, + ), + ], + vector_search=VectorSearch(vectorizers=[]), + ) + + async def mock_create_or_update_index(self, index, *args, **kwargs): + updated_indexes.append(index) + + monkeypatch.setattr(SearchIndexClient, "create_index", mock_create_index) + monkeypatch.setattr(SearchIndexClient, "list_index_names", mock_list_index_names) + monkeypatch.setattr(SearchIndexClient, "get_index", mock_get_index) + monkeypatch.setattr(SearchIndexClient, "create_or_update_index", mock_create_or_update_index) + + # Create a simple mock embeddings object with just the properties we need for index creation + class MockEmbeddings: + def __init__(self): + self.azure_endpoint = "https://test.openai.azure.com" + self.azure_deployment_name = "test-deployment" + self.open_ai_model_name = MOCK_EMBEDDING_MODEL_NAME + self.open_ai_dimensions = MOCK_EMBEDDING_DIMENSIONS + + embeddings = MockEmbeddings() + + manager = SearchManager(search_info, embeddings=embeddings, field_name_embedding="embedding") + await manager.create_index() + + assert len(created_indexes) == 0, "It should not have created a new index" + assert len(updated_indexes) == 1, "It should have updated the existing index" + assert updated_indexes[0].vector_search.vectorizers is not None + assert len(updated_indexes[0].vector_search.vectorizers) == 1, "Should have added one vectorizer" + # The vectorizer name for updating existing indexes uses index_name + assert updated_indexes[0].vector_search.vectorizers[0].vectorizer_name == "test-vectorizer" + # Verify the vectorizer parameters + vectorizer = updated_indexes[0].vector_search.vectorizers[0] + assert vectorizer.parameters.resource_url == "https://test.openai.azure.com" + assert vectorizer.parameters.deployment_name == "test-deployment" + assert vectorizer.parameters.model_name == MOCK_EMBEDDING_MODEL_NAME + + @pytest.mark.asyncio async def test_create_index_acls(monkeypatch, search_info): indexes = [] @@ -510,28 +578,22 @@ async def mock_upload_documents(self, documents): @pytest.mark.asyncio async def test_update_content_with_embeddings(monkeypatch, search_info): - async def mock_create_client(*args, **kwargs): - # From https://platform.openai.com/docs/api-reference/embeddings/create - return MockClient( - embeddings_client=MockEmbeddingsClient( - create_embedding_response=openai.types.CreateEmbeddingResponse( - object="list", - data=[ - openai.types.Embedding( - embedding=[ - 0.0023064255, - -0.009327292, - -0.0028842222, - ], - index=0, - object="embedding", - ) - ], - model="text-embedding-3-large", - usage=Usage(prompt_tokens=8, total_tokens=8), - ) + response = openai.types.CreateEmbeddingResponse( + object="list", + data=[ + openai.types.Embedding( + embedding=[ + 0.0023064255, + -0.009327292, + -0.0028842222, + ], + index=0, + object="embedding", ) - ) + ], + model="text-embedding-3-large", + usage=Usage(prompt_tokens=8, total_tokens=8), + ) documents_uploaded = [] @@ -539,16 +601,14 @@ async def mock_upload_documents(self, documents): documents_uploaded.extend(documents) monkeypatch.setattr(SearchClient, "upload_documents", mock_upload_documents) - embeddings = AzureOpenAIEmbeddingService( - open_ai_service="x", - open_ai_deployment="x", + embeddings = OpenAIEmbeddings( + open_ai_client=MockClient(MockEmbeddingsClient(response)), open_ai_model_name=MOCK_EMBEDDING_MODEL_NAME, open_ai_dimensions=MOCK_EMBEDDING_DIMENSIONS, - open_ai_api_version="test-api-version", - credential=AzureKeyCredential("test"), disable_batch=True, + azure_deployment_name="x", + azure_endpoint="https://x.openai.azure.com", ) - monkeypatch.setattr(embeddings, "create_client", mock_create_client) manager = SearchManager( search_info, embeddings=embeddings, @@ -943,14 +1003,29 @@ async def mock_list_index_names(self): ) # Create embeddings service - embeddings = AzureOpenAIEmbeddingService( - open_ai_service="x", - open_ai_deployment="x", + response = openai.types.CreateEmbeddingResponse( + object="list", + data=[ + openai.types.Embedding( + embedding=[ + 0.0023064255, + -0.009327292, + -0.0028842222, + ], + index=0, + object="embedding", + ) + ], + model="text-embedding-3-large", + usage=Usage(prompt_tokens=8, total_tokens=8), + ) + embeddings = OpenAIEmbeddings( + open_ai_client=MockClient(MockEmbeddingsClient(response)), open_ai_model_name=MOCK_EMBEDDING_MODEL_NAME, open_ai_dimensions=MOCK_EMBEDDING_DIMENSIONS, - open_ai_api_version="test-api-version", - credential=AzureKeyCredential("test"), disable_batch=True, + azure_deployment_name="x", + azure_endpoint="https://x.openai.azure.com", ) # Create a SearchManager with both search_images and embeddings diff --git a/tests/test_upload.py b/tests/test_upload.py index 41290625a7..75c576971c 100644 --- a/tests/test_upload.py +++ b/tests/test_upload.py @@ -6,16 +6,9 @@ import pytest from azure.search.documents.aio import SearchClient from azure.storage.filedatalake.aio import DataLakeDirectoryClient, DataLakeFileClient -from openai.types.create_embedding_response import ( - CreateEmbeddingResponse, - Embedding, - Usage, -) from quart.datastructures import FileStorage -from prepdocslib.embeddings import AzureOpenAIEmbeddingService - -from .mocks import MockClient, MockEmbeddingsClient +from prepdocslib.embeddings import OpenAIEmbeddings @pytest.mark.asyncio @@ -65,28 +58,8 @@ async def mock_upload_file(self, *args, **kwargs): monkeypatch.setattr(DataLakeFileClient, "upload_data", mock_upload_file) - async def mock_create_client(self, *args, **kwargs): - # From https://platform.openai.com/docs/api-reference/embeddings/create - return MockClient( - embeddings_client=MockEmbeddingsClient( - create_embedding_response=CreateEmbeddingResponse( - object="list", - data=[ - Embedding( - embedding=[ - 0.0023064255, - -0.009327292, - -0.0028842222, - ], - index=0, - object="embedding", - ) - ], - model="text-embedding-3-large", - usage=Usage(prompt_tokens=8, total_tokens=8), - ) - ) - ) + async def mock_create_embeddings(self, texts): + return [[0.0023064255, -0.009327292, -0.0028842222] for _ in texts] documents_uploaded = [] @@ -94,7 +67,7 @@ async def mock_upload_documents(self, documents): documents_uploaded.extend(documents) monkeypatch.setattr(SearchClient, "upload_documents", mock_upload_documents) - monkeypatch.setattr(AzureOpenAIEmbeddingService, "create_client", mock_create_client) + monkeypatch.setattr(OpenAIEmbeddings, "create_embeddings", mock_create_embeddings) response = await auth_client.post( "/upload",