chroma-core · HammadB · Dec 4, 2025 · Jan 9, 2026 · Jan 9, 2026 · Jan 9, 2026
diff --git a/chromadb/__init__.py b/chromadb/__init__.py
@@ -23,6 +23,7 @@
     Include,
     Metadata,
     Metadatas,
+    ReadLevel,
     Where,
     QueryResult,
     GetResult,

diff --git a/chromadb/api/__init__.py b/chromadb/api/__init__.py
@@ -62,6 +62,7 @@
     IncludeMetadataDocuments,
     Loadable,
     Metadatas,
+    ReadLevel,
     Schema,
     URIs,
     Where,
@@ -713,6 +714,7 @@ def _search(
         searches: List[Search],
         tenant: str = DEFAULT_TENANT,
         database: str = DEFAULT_DATABASE,
+        read_level: ReadLevel = ReadLevel.INDEX_AND_WAL,
     ) -> SearchResult:
         pass
 

diff --git a/chromadb/api/async_api.py b/chromadb/api/async_api.py
@@ -22,6 +22,7 @@
     IndexingStatus,
     Loadable,
     Metadatas,
+    ReadLevel,
     Schema,
     URIs,
     Where,
@@ -665,6 +666,7 @@ async def _search(
         searches: List[Search],
         tenant: str = DEFAULT_TENANT,
         database: str = DEFAULT_DATABASE,
+        read_level: ReadLevel = ReadLevel.INDEX_AND_WAL,
     ) -> SearchResult:
         pass
 

diff --git a/chromadb/api/async_fastapi.py b/chromadb/api/async_fastapi.py
@@ -35,6 +35,7 @@
     IndexingStatus,
     Schema,
     Metadatas,
+    ReadLevel,
     URIs,
     Where,
     WhereDocument,
@@ -444,9 +445,13 @@ async def _search(
         searches: List[Search],
         tenant: str = DEFAULT_TENANT,
         database: str = DEFAULT_DATABASE,
+        read_level: ReadLevel = ReadLevel.INDEX_AND_WAL,
     ) -> SearchResult:
         """Performs hybrid search on a collection"""
-        payload = {"searches": [s.to_dict() for s in searches]}
+        payload = {
+            "searches": [s.to_dict() for s in searches],
+            "read_level": read_level,
+        }
 
         resp_json = await self._make_request(
             "post",

diff --git a/chromadb/api/fastapi.py b/chromadb/api/fastapi.py
@@ -29,6 +29,7 @@
     IndexingStatus,
     Schema,
     Metadatas,
+    ReadLevel,
     URIs,
     Where,
     WhereDocument,
@@ -407,10 +408,14 @@ def _search(
         searches: List[Search],
         tenant: str = DEFAULT_TENANT,
         database: str = DEFAULT_DATABASE,
+        read_level: ReadLevel = ReadLevel.INDEX_AND_WAL,
     ) -> SearchResult:
         """Performs hybrid search on a collection"""
         # Convert Search objects to dictionaries
-        payload = {"searches": [s.to_dict() for s in searches]}
+        payload = {
+            "searches": [s.to_dict() for s in searches],
+            "read_level": read_level,
+        }
 
         resp_json = self._make_request(
             "post",

diff --git a/chromadb/api/models/AsyncCollection.py b/chromadb/api/models/AsyncCollection.py
@@ -16,6 +16,7 @@
     QueryResult,
     ID,
     OneOrMany,
+    ReadLevel,
     WhereDocument,
     SearchResult,
     maybe_cast_one_to_many,
@@ -311,6 +312,7 @@ async def fork(
     async def search(
         self,
         searches: OneOrMany[Search],
+        read_level: ReadLevel = ReadLevel.INDEX_AND_WAL,
     ) -> SearchResult:
         """Perform hybrid search on the collection.
         This is an experimental API that only works for Hosted Chroma for now.
@@ -321,6 +323,11 @@ async def search(
                 - rank: Ranking expression for hybrid search (defaults to Val(0.0))
                 - limit: Limit configuration for pagination (defaults to no limit)
                 - select: Select configuration for keys to return (defaults to empty)
+            read_level: Controls whether to read from the write-ahead log (WAL):
+                - ReadLevel.INDEX_AND_WAL: Read from both the compacted index and WAL (default).
+                  All committed writes will be visible.
+                - ReadLevel.INDEX_ONLY: Read only from the compacted index, skipping the WAL.
+                  Faster, but recent writes that haven't been compacted may not be visible.
 
         Returns:
             SearchResult: Column-major format response with:
@@ -368,6 +375,10 @@ async def search(
                 Search().where(K("type") == "paper").rank(Knn(query=[0.3, 0.4]))
             ]
             results = await collection.search(searches)
+
+            # Skip WAL for faster queries (may miss recent uncommitted writes)
+            from chromadb.api.types import ReadLevel
+            result = await collection.search(search, read_level=ReadLevel.INDEX_ONLY)
         """
         # Convert single search to list for consistent handling
         searches_list = maybe_cast_one_to_many(searches)
@@ -384,6 +395,7 @@ async def search(
             searches=cast(List[Search], embedded_searches),
             tenant=self.tenant,
             database=self.database,
+            read_level=read_level,
         )
 
     async def update(

diff --git a/chromadb/api/models/Collection.py b/chromadb/api/models/Collection.py
@@ -17,6 +17,7 @@
     QueryResult,
     ID,
     OneOrMany,
+    ReadLevel,
     WhereDocument,
     SearchResult,
     maybe_cast_one_to_many,
@@ -320,6 +321,7 @@ def fork(
     def search(
         self,
         searches: OneOrMany[Search],
+        read_level: ReadLevel = ReadLevel.INDEX_AND_WAL,
     ) -> SearchResult:
         """Perform hybrid search on the collection.
         This is an experimental API that only works for Hosted Chroma for now.
@@ -330,6 +332,11 @@ def search(
                 - rank: Ranking expression for hybrid search (defaults to Val(0.0))
                 - limit: Limit configuration for pagination (defaults to no limit)
                 - select: Select configuration for keys to return (defaults to empty)
+            read_level: Controls whether to read from the write-ahead log (WAL):
+                - ReadLevel.INDEX_AND_WAL: Read from both the compacted index and WAL (default).
+                  All committed writes will be visible.
+                - ReadLevel.INDEX_ONLY: Read only from the compacted index, skipping the WAL.
+                  Faster, but recent writes that haven't been compacted may not be visible.
 
         Returns:
             SearchResult: Column-major format response with:
@@ -377,6 +384,10 @@ def search(
                 Search().where(K("type") == "paper").rank(Knn(query=[0.3, 0.4]))
             ]
             results = collection.search(searches)
+
+            # Skip WAL for faster queries (may miss recent uncommitted writes)
+            from chromadb.api.types import ReadLevel
+            result = collection.search(search, read_level=ReadLevel.INDEX_ONLY)
         """
         # Convert single search to list for consistent handling
         searches_list = maybe_cast_one_to_many(searches)
@@ -393,6 +404,7 @@ def search(
             searches=cast(List[Search], embedded_searches),
             tenant=self.tenant,
             database=self.database,
+            read_level=read_level,
         )
 
     def update(

diff --git a/chromadb/api/rust.py b/chromadb/api/rust.py
@@ -39,6 +39,7 @@
     IncludeMetadataDocuments,
     IncludeMetadataDocumentsDistances,
     IncludeMetadataDocumentsEmbeddings,
+    ReadLevel,
     Schema,
     SearchResult,
 )
@@ -341,9 +342,7 @@ def _get_indexing_status(
         tenant: str = DEFAULT_TENANT,
         database: str = DEFAULT_DATABASE,
     ) -> "IndexingStatus":
-        raise NotImplementedError(
-            "Indexing status is not implemented for Local Chroma"
-        )
+        raise NotImplementedError("Indexing status is not implemented for Local Chroma")
 
     @override
     def _search(
@@ -352,6 +351,7 @@ def _search(
         searches: List[Search],
         tenant: str = DEFAULT_TENANT,
         database: str = DEFAULT_DATABASE,
+        read_level: ReadLevel = ReadLevel.INDEX_AND_WAL,
     ) -> SearchResult:
         raise NotImplementedError("Search is not implemented for Local Chroma")
 

diff --git a/chromadb/api/segment.py b/chromadb/api/segment.py
@@ -40,6 +40,7 @@
     Embeddings,
     Metadatas,
     Documents,
+    ReadLevel,
     Schema,
     URIs,
     Where,
@@ -439,9 +440,7 @@ def _get_indexing_status(
         tenant: str = DEFAULT_TENANT,
         database: str = DEFAULT_DATABASE,
     ) -> "IndexingStatus":
-        raise NotImplementedError(
-            "Indexing status is not implemented for SegmentAPI"
-        )
+        raise NotImplementedError("Indexing status is not implemented for SegmentAPI")
 
     @override
     def _search(
@@ -450,6 +449,7 @@ def _search(
         searches: List[Search],
         tenant: str = DEFAULT_TENANT,
         database: str = DEFAULT_DATABASE,
+        read_level: ReadLevel = ReadLevel.INDEX_AND_WAL,
     ) -> SearchResult:
         raise NotImplementedError("Search is not implemented for SegmentAPI")
 

diff --git a/chromadb/api/types.py b/chromadb/api/types.py
@@ -760,6 +760,20 @@ class IndexMetadata(TypedDict):
 Space = Literal["cosine", "l2", "ip"]
 
 
+class ReadLevel(str, Enum):
+    """Controls whether search queries read from the write-ahead log (WAL).
+
+    Attributes:
+        INDEX_AND_WAL: Read from both the compacted index and the WAL (default).
+            All committed writes will be visible.
+        INDEX_ONLY: Read only from the compacted index, skipping the WAL.
+            Faster, but recent writes that haven't been compacted may not be visible.
+    """
+
+    INDEX_AND_WAL = "index_and_wal"
+    INDEX_ONLY = "index_only"
+
+
 # TODO: make warnings prettier and add link to migration docs
 @runtime_checkable
 class EmbeddingFunction(Protocol[D]):
@@ -776,7 +790,8 @@ class EmbeddingFunction(Protocol[D]):
     """
 
     @abstractmethod
-    def __call__(self, input: D) -> Embeddings: ...
+    def __call__(self, input: D) -> Embeddings:
+        ...
 
     def embed_query(self, input: D) -> Embeddings:
         """
@@ -960,7 +975,8 @@ def validate_embedding_function(
 
 
 class DataLoader(Protocol[L]):
-    def __call__(self, uris: URIs) -> L: ...
+    def __call__(self, uris: URIs) -> L:
+        ...
 
 
 def validate_ids(ids: IDs) -> IDs:
@@ -1417,7 +1433,8 @@ class SparseEmbeddingFunction(Protocol[D]):
     """
 
     @abstractmethod
-    def __call__(self, input: D) -> SparseVectors: ...
+    def __call__(self, input: D) -> SparseVectors:
+        ...
 
     def embed_query(self, input: D) -> SparseVectors:
         """
@@ -1611,9 +1628,9 @@ class VectorIndexConfig(BaseModel):
 
     space: Optional[Space] = None
     embedding_function: Optional[Any] = DefaultEmbeddingFunction()
-    source_key: Optional[str] = (
-        None  # key to source the vector from (accepts str or Key)
-    )
+    source_key: Optional[
+        str
+    ] = None  # key to source the vector from (accepts str or Key)
     hnsw: Optional[HnswIndexConfig] = None
     spann: Optional[SpannIndexConfig] = None
 
@@ -1662,9 +1679,9 @@ class SparseVectorIndexConfig(BaseModel):
 
     # TODO(Sanket): Change this to the appropriate sparse ef and use a default here.
     embedding_function: Optional[Any] = None
-    source_key: Optional[str] = (
-        None  # key to source the sparse vector from (accepts str or Key)
-    )
+    source_key: Optional[
+        str
+    ] = None  # key to source the sparse vector from (accepts str or Key)
     bm25: Optional[bool] = None
 
     @field_validator("source_key", mode="before")