|
8 | 8 | from pymongo.collection import Collection |
9 | 9 |
|
10 | 10 | from pymongo_search_utils.index import ( |
| 11 | + create_fulltext_search_index, |
11 | 12 | create_vector_search_index, |
12 | 13 | drop_vector_search_index, |
13 | 14 | is_index_ready, |
14 | 15 | update_vector_search_index, |
| 16 | + vector_search_index_definition, |
| 17 | + wait_for_docs_in_index, |
| 18 | + wait_for_predicate, |
15 | 19 | ) |
16 | 20 |
|
17 | 21 | DB_NAME = "pymongo_search_utils_test" |
18 | 22 | COLLECTION_NAME = "test_index" |
19 | 23 | VECTOR_INDEX_NAME = "vector_index" |
| 24 | +FULLTEXT_INDEX_NAME = "fulltext_index" |
20 | 25 |
|
21 | 26 | TIMEOUT = 120 |
22 | 27 | DIMENSIONS = 10 |
@@ -114,3 +119,186 @@ def test_search_index_update_vector_search_index(collection: Collection) -> None |
114 | 119 | assert len(indexes) == 1 |
115 | 120 | assert indexes[0]["name"] == index_name |
116 | 121 | assert indexes[0]["latestDefinition"]["fields"][0]["similarity"] == similarity_new |
| 122 | + |
| 123 | + |
| 124 | +def test_vector_search_index_definition() -> None: |
| 125 | + """Test the vector_search_index_definition helper function.""" |
| 126 | + # Test basic definition without filters |
| 127 | + definition = vector_search_index_definition( |
| 128 | + dimensions=128, path="embedding", similarity="cosine" |
| 129 | + ) |
| 130 | + assert "fields" in definition |
| 131 | + assert len(definition["fields"]) == 1 |
| 132 | + assert definition["fields"][0]["numDimensions"] == 128 |
| 133 | + assert definition["fields"][0]["path"] == "embedding" |
| 134 | + assert definition["fields"][0]["similarity"] == "cosine" |
| 135 | + assert definition["fields"][0]["type"] == "vector" |
| 136 | + |
| 137 | + # Test with filters |
| 138 | + definition = vector_search_index_definition( |
| 139 | + dimensions=256, path="vector", similarity="euclidean", filters=["category", "status"] |
| 140 | + ) |
| 141 | + assert len(definition["fields"]) == 3 |
| 142 | + assert definition["fields"][0]["type"] == "vector" |
| 143 | + assert definition["fields"][1]["type"] == "filter" |
| 144 | + assert definition["fields"][1]["path"] == "category" |
| 145 | + assert definition["fields"][2]["type"] == "filter" |
| 146 | + assert definition["fields"][2]["path"] == "status" |
| 147 | + |
| 148 | + # Test with vector_index_options |
| 149 | + definition = vector_search_index_definition( |
| 150 | + dimensions=512, |
| 151 | + path="embed", |
| 152 | + similarity="dotProduct", |
| 153 | + vector_index_options={"quantization": {"type": "scalar"}}, |
| 154 | + ) |
| 155 | + assert definition["fields"][0]["quantization"] == {"type": "scalar"} |
| 156 | + |
| 157 | + # Test with kwargs |
| 158 | + definition = vector_search_index_definition( |
| 159 | + dimensions=64, path="vec", similarity="cosine", storedSource=True |
| 160 | + ) |
| 161 | + assert definition["storedSource"] is True |
| 162 | + |
| 163 | + |
| 164 | +def test_wait_for_predicate() -> None: |
| 165 | + """Test the wait_for_predicate utility function.""" |
| 166 | + import time |
| 167 | + |
| 168 | + # Test successful predicate |
| 169 | + counter = {"value": 0} |
| 170 | + |
| 171 | + def increment_predicate(): |
| 172 | + counter["value"] += 1 |
| 173 | + return counter["value"] >= 3 |
| 174 | + |
| 175 | + start = time.monotonic() |
| 176 | + wait_for_predicate(increment_predicate, "Should not timeout", timeout=5, interval=0.1) |
| 177 | + elapsed = time.monotonic() - start |
| 178 | + assert counter["value"] >= 3 |
| 179 | + assert elapsed < 5 |
| 180 | + |
| 181 | + # Test timeout |
| 182 | + def always_false(): |
| 183 | + return False |
| 184 | + |
| 185 | + with pytest.raises(TimeoutError, match="Predicate failed"): |
| 186 | + wait_for_predicate(always_false, "Predicate failed", timeout=0.5, interval=0.1) |
| 187 | + |
| 188 | + |
| 189 | +def test_create_fulltext_search_index_single_field(collection: Collection) -> None: |
| 190 | + """Test creating a fulltext search index on a single field.""" |
| 191 | + index_name = FULLTEXT_INDEX_NAME |
| 192 | + field = "description" |
| 193 | + wait_until_complete = TIMEOUT |
| 194 | + |
| 195 | + # Clean up existing indexes |
| 196 | + for index_info in collection.list_search_indexes(): |
| 197 | + drop_vector_search_index( |
| 198 | + collection, index_info["name"], wait_until_complete=wait_until_complete |
| 199 | + ) |
| 200 | + |
| 201 | + # Create fulltext search index |
| 202 | + create_fulltext_search_index( |
| 203 | + collection=collection, |
| 204 | + index_name=index_name, |
| 205 | + field=field, |
| 206 | + wait_until_complete=wait_until_complete, |
| 207 | + ) |
| 208 | + |
| 209 | + # Verify index was created |
| 210 | + assert is_index_ready(collection, index_name) |
| 211 | + indexes = list(collection.list_search_indexes()) |
| 212 | + assert len(indexes) == 1 |
| 213 | + assert indexes[0]["name"] == index_name |
| 214 | + assert indexes[0]["type"] == "search" |
| 215 | + assert indexes[0]["latestDefinition"]["mappings"]["dynamic"] is False |
| 216 | + assert field in indexes[0]["latestDefinition"]["mappings"]["fields"] |
| 217 | + |
| 218 | + # Clean up |
| 219 | + drop_vector_search_index(collection, index_name, wait_until_complete=wait_until_complete) |
| 220 | + |
| 221 | + |
| 222 | +def test_create_fulltext_search_index_multiple_fields(collection: Collection) -> None: |
| 223 | + """Test creating a fulltext search index on multiple fields.""" |
| 224 | + index_name = "fulltext_multi_index" |
| 225 | + fields = ["title", "description", "content"] |
| 226 | + wait_until_complete = TIMEOUT |
| 227 | + |
| 228 | + # Clean up existing indexes |
| 229 | + for index_info in collection.list_search_indexes(): |
| 230 | + drop_vector_search_index( |
| 231 | + collection, index_info["name"], wait_until_complete=wait_until_complete |
| 232 | + ) |
| 233 | + |
| 234 | + # Create fulltext search index with multiple fields |
| 235 | + create_fulltext_search_index( |
| 236 | + collection=collection, |
| 237 | + index_name=index_name, |
| 238 | + field=fields, |
| 239 | + wait_until_complete=wait_until_complete, |
| 240 | + ) |
| 241 | + |
| 242 | + # Verify index was created |
| 243 | + assert is_index_ready(collection, index_name) |
| 244 | + indexes = list(collection.list_search_indexes()) |
| 245 | + assert len(indexes) == 1 |
| 246 | + assert indexes[0]["name"] == index_name |
| 247 | + assert indexes[0]["type"] == "search" |
| 248 | + |
| 249 | + # Verify all fields are in the index |
| 250 | + index_fields = indexes[0]["latestDefinition"]["mappings"]["fields"] |
| 251 | + for field in fields: |
| 252 | + assert field in index_fields |
| 253 | + assert index_fields[field] == [{"type": "string"}] |
| 254 | + |
| 255 | + # Clean up |
| 256 | + drop_vector_search_index(collection, index_name, wait_until_complete=wait_until_complete) |
| 257 | + |
| 258 | + |
| 259 | +def test_wait_for_docs_in_index(collection: Collection) -> None: |
| 260 | + """Test waiting for documents to be indexed in a vector search index.""" |
| 261 | + index_name = "wait_docs_index" |
| 262 | + dimensions = DIMENSIONS |
| 263 | + path = "embedding" |
| 264 | + similarity = "cosine" |
| 265 | + wait_until_complete = TIMEOUT |
| 266 | + |
| 267 | + # Clean up existing indexes |
| 268 | + for index_info in collection.list_search_indexes(): |
| 269 | + drop_vector_search_index( |
| 270 | + collection, index_info["name"], wait_until_complete=wait_until_complete |
| 271 | + ) |
| 272 | + |
| 273 | + # Create vector search index |
| 274 | + create_vector_search_index( |
| 275 | + collection=collection, |
| 276 | + index_name=index_name, |
| 277 | + dimensions=dimensions, |
| 278 | + path=path, |
| 279 | + similarity=similarity, |
| 280 | + wait_until_complete=wait_until_complete, |
| 281 | + ) |
| 282 | + |
| 283 | + # Insert test documents with embeddings |
| 284 | + n_docs = 5 |
| 285 | + docs = [{"_id": i, path: [0.1] * dimensions, "text": f"doc {i}"} for i in range(n_docs)] |
| 286 | + collection.insert_many(docs) |
| 287 | + |
| 288 | + # Wait for documents to be indexed |
| 289 | + result = wait_for_docs_in_index(collection, index_name, n_docs) |
| 290 | + assert result is True |
| 291 | + |
| 292 | + # Clean up |
| 293 | + drop_vector_search_index(collection, index_name, wait_until_complete=wait_until_complete) |
| 294 | + |
| 295 | + |
| 296 | +def test_wait_for_docs_in_index_nonexistent(collection: Collection) -> None: |
| 297 | + """Test wait_for_docs_in_index raises error for non-existent index.""" |
| 298 | + # Ensure no indexes exist |
| 299 | + for index_info in collection.list_search_indexes(): |
| 300 | + drop_vector_search_index(collection, index_info["name"], wait_until_complete=TIMEOUT) |
| 301 | + |
| 302 | + # Should raise ValueError for non-existent index |
| 303 | + with pytest.raises(ValueError, match="does not exist"): |
| 304 | + wait_for_docs_in_index(collection, "nonexistent_index", 1) |
0 commit comments