Skip to content

Commit 537ea60

Browse files
committed
fix: address PR review comments on serialization helpers
- Sanitize Redis URLs by removing passwords before serialization - Add _sanitize_redis_url() helper function - Replace password with *** while preserving username - Move to_dict() and to_yaml() to BaseSearchIndex to avoid duplication - Handle None _redis_url by omitting from output instead of writing None - Move import yaml to top of file instead of inline import - Add overwrite parameter to to_yaml() for API consistency - When overwrite=False, raises FileExistsError if file exists - Update tests to cover new functionality Fixes review comments from Copilot/Cursor on PR redis#525
1 parent 9ebc8c0 commit 537ea60

File tree

2 files changed

+196
-86
lines changed

2 files changed

+196
-86
lines changed

redisvl/index/index.py

Lines changed: 89 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,9 @@
2121
cast,
2222
)
2323

24+
import yaml
25+
from urllib.parse import urlparse, urlunparse
26+
2427
import redis.exceptions
2528

2629
# Add missing imports
@@ -107,6 +110,38 @@
107110
_HYBRID_SEARCH_ERROR_MESSAGE = "Hybrid search is not available in this version of redis-py. Please upgrade to redis-py >= 7.1.0."
108111

109112

113+
def _sanitize_redis_url(url: Optional[str]) -> Optional[str]:
114+
"""Remove password from Redis URL for safe serialization.
115+
116+
Args:
117+
url: Redis URL that may contain a password.
118+
119+
Returns:
120+
URL with password replaced by '***', or None if url is None.
121+
122+
Example:
123+
>>> _sanitize_redis_url("redis://:secret@localhost:6379")
124+
'redis://***@localhost:6379'
125+
"""
126+
if not url:
127+
return None
128+
try:
129+
parsed = urlparse(url)
130+
if parsed.password:
131+
# Replace password with *** but keep username if present
132+
if parsed.username:
133+
netloc = f"{parsed.username}:***@{parsed.hostname}"
134+
else:
135+
netloc = f"***@{parsed.hostname}"
136+
if parsed.port:
137+
netloc = f"{netloc}:{parsed.port}"
138+
return urlunparse(parsed._replace(netloc=netloc))
139+
return url
140+
except Exception:
141+
# If parsing fails, return a safe placeholder
142+
return "***"
143+
144+
110145
REQUIRED_MODULES_FOR_INTROSPECTION = [
111146
{"name": "search", "ver": 20810},
112147
{"name": "searchlight", "ver": 20810},
@@ -400,6 +435,60 @@ def key(self, id: str) -> str:
400435
key_separator=self.schema.index.key_separator,
401436
)
402437

438+
def to_dict(self, include_connection: bool = False) -> Dict[str, Any]:
439+
"""Serialize the index configuration to a dictionary.
440+
441+
Args:
442+
include_connection (bool, optional): Whether to include connection
443+
parameters. Defaults to False for security (passwords/URLs
444+
are excluded by default).
445+
446+
Returns:
447+
Dict[str, Any]: Dictionary representation of the index configuration.
448+
449+
Example:
450+
>>> config = index.to_dict()
451+
>>> new_index = SearchIndex.from_dict(config)
452+
"""
453+
config = self.schema.to_dict()
454+
if include_connection:
455+
# Sanitize the Redis URL to remove password before serialization
456+
sanitized_url = _sanitize_redis_url(self._redis_url)
457+
if sanitized_url is not None:
458+
config["_redis_url"] = sanitized_url
459+
# Note: connection_kwargs may contain sensitive info
460+
# Only include non-sensitive keys
461+
safe_keys = {"decode_responses", "ssl", "socket_timeout", "socket_connect_timeout"}
462+
connection_kwargs = getattr(self, "_connection_kwargs", {}) or {}
463+
config["_connection_kwargs"] = {
464+
k: v for k, v in connection_kwargs.items()
465+
if k in safe_keys
466+
}
467+
return config
468+
469+
def to_yaml(self, path: str, include_connection: bool = False, overwrite: bool = True) -> None:
470+
"""Serialize the index configuration to a YAML file.
471+
472+
Args:
473+
path (str): Path to write the YAML file.
474+
include_connection (bool, optional): Whether to include connection
475+
parameters. Defaults to False for security.
476+
overwrite (bool, optional): Whether to overwrite existing file.
477+
Defaults to True. If False and file exists, raises FileExistsError.
478+
479+
Example:
480+
>>> index.to_yaml("schemas/my_index.yaml")
481+
482+
Raises:
483+
FileExistsError: If overwrite=False and file already exists.
484+
"""
485+
import os
486+
if not overwrite and os.path.exists(path):
487+
raise FileExistsError(f"File already exists: {path}")
488+
config = self.to_dict(include_connection=include_connection)
489+
with open(path, "w") as f:
490+
yaml.dump(config, f, default_flow_style=False, sort_keys=False)
491+
403492

404493
class SearchIndex(BaseSearchIndex):
405494
"""A search index class for interacting with Redis as a vector database.
@@ -1294,49 +1383,6 @@ def info(self, name: Optional[str] = None) -> Dict[str, Any]:
12941383
index_name = name or self.schema.index.name
12951384
return self._info(index_name, self._redis_client)
12961385

1297-
def to_dict(self, include_connection: bool = False) -> Dict[str, Any]:
1298-
"""Serialize the index configuration to a dictionary.
1299-
1300-
Args:
1301-
include_connection (bool, optional): Whether to include connection
1302-
parameters. Defaults to False for security (passwords/URLs
1303-
are excluded by default).
1304-
1305-
Returns:
1306-
Dict[str, Any]: Dictionary representation of the index configuration.
1307-
1308-
Example:
1309-
>>> config = index.to_dict()
1310-
>>> new_index = SearchIndex.from_dict(config)
1311-
"""
1312-
config = self.schema.to_dict()
1313-
if include_connection:
1314-
config["_redis_url"] = self._redis_url
1315-
# Note: connection_kwargs may contain sensitive info
1316-
# Only include non-sensitive keys
1317-
safe_keys = {"decode_responses", "ssl", "socket_timeout", "socket_connect_timeout"}
1318-
config["_connection_kwargs"] = {
1319-
k: v for k, v in self._connection_kwargs.items()
1320-
if k in safe_keys
1321-
}
1322-
return config
1323-
1324-
def to_yaml(self, path: str, include_connection: bool = False) -> None:
1325-
"""Serialize the index configuration to a YAML file.
1326-
1327-
Args:
1328-
path (str): Path to write the YAML file.
1329-
include_connection (bool, optional): Whether to include connection
1330-
parameters. Defaults to False for security.
1331-
1332-
Example:
1333-
>>> index.to_yaml("schemas/my_index.yaml")
1334-
"""
1335-
import yaml
1336-
config = self.to_dict(include_connection=include_connection)
1337-
with open(path, "w") as f:
1338-
yaml.dump(config, f, default_flow_style=False, sort_keys=False)
1339-
13401386
def __enter__(self):
13411387
return self
13421388

@@ -2294,49 +2340,6 @@ def disconnect_sync(self):
22942340
return
22952341
sync_wrapper(self.disconnect)()
22962342

2297-
def to_dict(self, include_connection: bool = False) -> Dict[str, Any]:
2298-
"""Serialize the index configuration to a dictionary.
2299-
2300-
Args:
2301-
include_connection (bool, optional): Whether to include connection
2302-
parameters. Defaults to False for security (passwords/URLs
2303-
are excluded by default).
2304-
2305-
Returns:
2306-
Dict[str, Any]: Dictionary representation of the index configuration.
2307-
2308-
Example:
2309-
>>> config = index.to_dict()
2310-
>>> new_index = AsyncSearchIndex.from_dict(config)
2311-
"""
2312-
config = self.schema.to_dict()
2313-
if include_connection:
2314-
config["_redis_url"] = self._redis_url
2315-
# Note: connection_kwargs may contain sensitive info
2316-
# Only include non-sensitive keys
2317-
safe_keys = {"decode_responses", "ssl", "socket_timeout", "socket_connect_timeout"}
2318-
config["_connection_kwargs"] = {
2319-
k: v for k, v in self._connection_kwargs.items()
2320-
if k in safe_keys
2321-
}
2322-
return config
2323-
2324-
def to_yaml(self, path: str, include_connection: bool = False) -> None:
2325-
"""Serialize the index configuration to a YAML file.
2326-
2327-
Args:
2328-
path (str): Path to write the YAML file.
2329-
include_connection (bool, optional): Whether to include connection
2330-
parameters. Defaults to False for security.
2331-
2332-
Example:
2333-
>>> await index.to_yaml("schemas/my_index.yaml")
2334-
"""
2335-
import yaml
2336-
config = self.to_dict(include_connection=include_connection)
2337-
with open(path, "w") as f:
2338-
yaml.dump(config, f, default_flow_style=False, sort_keys=False)
2339-
23402343
async def __aenter__(self):
23412344
return self
23422345

tests/unit/test_index_serialization.py

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,46 @@ def test_to_dict_with_connection(self, sample_schema):
5959
assert config["_connection_kwargs"]["ssl"] is True
6060
assert config["_connection_kwargs"]["socket_timeout"] == 30
6161

62+
def test_to_dict_sanitizes_password(self, sample_schema):
63+
"""Test to_dict() sanitizes passwords from redis_url."""
64+
index = SearchIndex(
65+
schema=sample_schema,
66+
redis_url="redis://:mysecretpassword@localhost:6379",
67+
)
68+
69+
config = index.to_dict(include_connection=True)
70+
71+
assert "_redis_url" in config
72+
# Password should be sanitized
73+
assert "mysecretpassword" not in config["_redis_url"]
74+
assert "***" in config["_redis_url"]
75+
76+
def test_to_dict_sanitizes_password_with_username(self, sample_schema):
77+
"""Test to_dict() sanitizes passwords but keeps username."""
78+
index = SearchIndex(
79+
schema=sample_schema,
80+
redis_url="redis://user:pass@localhost:6379",
81+
)
82+
83+
config = index.to_dict(include_connection=True)
84+
85+
assert "_redis_url" in config
86+
assert "pass" not in config["_redis_url"]
87+
assert "user" in config["_redis_url"]
88+
assert "***" in config["_redis_url"]
89+
90+
def test_to_dict_none_url_omitted(self, sample_schema):
91+
"""Test to_dict() omits _redis_url when it's None."""
92+
index = SearchIndex(
93+
schema=sample_schema,
94+
redis_url=None,
95+
)
96+
97+
config = index.to_dict(include_connection=True)
98+
99+
# _redis_url should be omitted, not set to None
100+
assert "_redis_url" not in config
101+
62102
def test_to_yaml_without_connection(self, sample_schema):
63103
"""Test to_yaml() writes schema to YAML file."""
64104
index = SearchIndex(schema=sample_schema)
@@ -94,6 +134,35 @@ def test_to_yaml_with_connection(self, sample_schema):
94134
finally:
95135
Path(path).unlink()
96136

137+
def test_to_yaml_overwrite_false_raises(self, sample_schema):
138+
"""Test to_yaml() raises FileExistsError when overwrite=False."""
139+
index = SearchIndex(schema=sample_schema)
140+
141+
with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f:
142+
path = f.name
143+
144+
try:
145+
with pytest.raises(FileExistsError):
146+
index.to_yaml(path, overwrite=False)
147+
finally:
148+
Path(path).unlink()
149+
150+
def test_to_yaml_overwrite_true(self, sample_schema):
151+
"""Test to_yaml() overwrites when overwrite=True."""
152+
index = SearchIndex(schema=sample_schema)
153+
154+
with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f:
155+
path = f.name
156+
157+
try:
158+
# Should not raise
159+
index.to_yaml(path, overwrite=True)
160+
161+
content = Path(path).read_text()
162+
assert "test_index" in content
163+
finally:
164+
Path(path).unlink()
165+
97166
def test_roundtrip_dict(self, sample_schema):
98167
"""Test that to_dict() and from_dict() roundtrip correctly."""
99168
original_index = SearchIndex(
@@ -138,6 +207,31 @@ def test_to_dict_with_connection(self, sample_schema):
138207
# Password should not be included (not in safe_keys)
139208
assert "password" not in config.get("_connection_kwargs", {})
140209

210+
def test_to_dict_sanitizes_password(self, sample_schema):
211+
"""Test to_dict() sanitizes passwords from redis_url."""
212+
index = AsyncSearchIndex(
213+
schema=sample_schema,
214+
redis_url="redis://:secret@localhost:6379",
215+
)
216+
217+
config = index.to_dict(include_connection=True)
218+
219+
assert "_redis_url" in config
220+
assert "secret" not in config["_redis_url"]
221+
assert "***" in config["_redis_url"]
222+
223+
def test_to_dict_none_url_omitted(self, sample_schema):
224+
"""Test to_dict() omits _redis_url when it's None."""
225+
index = AsyncSearchIndex(
226+
schema=sample_schema,
227+
redis_url=None,
228+
)
229+
230+
config = index.to_dict(include_connection=True)
231+
232+
# _redis_url should be omitted, not set to None
233+
assert "_redis_url" not in config
234+
141235
def test_to_yaml(self, sample_schema):
142236
"""Test to_yaml() writes schema to YAML file."""
143237
index = AsyncSearchIndex(schema=sample_schema)
@@ -153,6 +247,19 @@ def test_to_yaml(self, sample_schema):
153247
finally:
154248
Path(path).unlink()
155249

250+
def test_to_yaml_overwrite_false_raises(self, sample_schema):
251+
"""Test to_yaml() raises FileExistsError when overwrite=False."""
252+
index = AsyncSearchIndex(schema=sample_schema)
253+
254+
with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f:
255+
path = f.name
256+
257+
try:
258+
with pytest.raises(FileExistsError):
259+
index.to_yaml(path, overwrite=False)
260+
finally:
261+
Path(path).unlink()
262+
156263
def test_roundtrip_dict(self, sample_schema):
157264
"""Test that to_dict() and from_dict() roundtrip correctly."""
158265
original_index = AsyncSearchIndex(

0 commit comments

Comments
 (0)