diff --git a/examples.py b/examples.py index 935a6f15..fb3be7bc 100755 --- a/examples.py +++ b/examples.py @@ -6,7 +6,7 @@ import json import logging import sys -from datetime import datetime, timedelta +from datetime import datetime, timedelta, timezone from pathlib import Path from typing import Any, List, Optional from unittest.mock import patch @@ -448,9 +448,7 @@ def display_hidemyemail(api: PyiCloudService) -> None: def album_management(api: PyiCloudService) -> None: """Test album management functions""" - album_name = ( - f"{datetime.utcnow().strftime('pyicloud-live-%Y%m%d-%H%M%S')}-{uuid4().hex[:8]}" - ) + album_name = f"{datetime.now(timezone.utc).strftime('pyicloud-live-%Y%m%d-%H%M%S')}-{uuid4().hex[:8]}" renamed_name = f"{album_name}-renamed" print( "Running live photo mutation validation against the authenticated account. " diff --git a/pyicloud/cli/commands/photos.py b/pyicloud/cli/commands/photos.py index 7443b912..dabc7c11 100644 --- a/pyicloud/cli/commands/photos.py +++ b/pyicloud/cli/commands/photos.py @@ -2,6 +2,7 @@ from __future__ import annotations +import time from itertools import islice from pathlib import Path from typing import Any, Iterator, Optional @@ -933,27 +934,32 @@ def photos_watch( state.write_json(payloads) return - next_iteration = 1 - watch_payloads = _iter_photo_watch_results( - api=api, - photos=photos, - options=options, - interval_seconds=interval, - iterations=iterations, - ) - while True: + completed_iterations = 0 + while iterations is None or completed_iterations < iterations: + next_iteration = completed_iterations + 1 + if completed_iterations > 0: + _print_photo_watch_wait( + state, + interval_seconds=interval, + next_iteration=next_iteration, + iterations=iterations, + ) + time.sleep(interval) + state.console.print() _print_photo_watch_start( state, iteration=next_iteration, interval_seconds=interval, iterations=iterations, ) - try: - payload = next(watch_payloads) - except StopIteration: - return - if payload["iteration"] > 1: - state.console.print() + sync_result = service_call( + "Photos", + lambda: photos.sync(options), + account_name=api.account_name, + ) + completed_iterations += 1 + payload = normalize_photo_sync_result(sync_result) + payload["iteration"] = completed_iterations if only_print_filenames: if iterations is None or (iterations and iterations > 1): state.console.print(f"run {payload['iteration']}") @@ -965,15 +971,6 @@ def photos_watch( payload, title=f"Photo Watch Run {payload['iteration']}", ) - next_iteration = payload["iteration"] + 1 - if iterations is not None and payload["iteration"] >= iterations: - return - _print_photo_watch_wait( - state, - interval_seconds=interval, - next_iteration=next_iteration, - iterations=iterations, - ) except PhotosServiceException as err: raise CLIAbort(str(err)) from err except KeyboardInterrupt as err: diff --git a/pyicloud/common/cloudkit/client.py b/pyicloud/common/cloudkit/client.py index 7ed10fa3..627d4665 100644 --- a/pyicloud/common/cloudkit/client.py +++ b/pyicloud/common/cloudkit/client.py @@ -84,7 +84,7 @@ def build_url(self, path: str) -> str: def post(self, path: str, payload: Dict, *, headers: Dict | None = None) -> Dict: url = self.build_url(path) - LOGGER.debug("CloudKit POST %s", url) + LOGGER.debug("CloudKit POST %s", path) kwargs = {"json": payload, "timeout": self._REQUEST_TIMEOUT} if headers is not None: kwargs["headers"] = headers @@ -123,13 +123,22 @@ def post(self, path: str, payload: Dict, *, headers: Dict | None = None) -> Dict ) from exc def get_bytes(self, url: str) -> bytes: - LOGGER.debug("CloudKit asset GET %s", url) + LOGGER.debug("CloudKit asset GET ") resp = self._session.get(url, timeout=self._REQUEST_TIMEOUT) code = getattr(resp, "status_code", 0) if not isinstance(code, int): code = 200 if code in (401, 403): raise CloudKitAuthError(f"HTTP {code}: unauthorized") + if code == 429: + retry_after = None + try: + hdr = resp.headers.get("Retry-After") + if hdr: + retry_after = float(hdr) + except Exception: + retry_after = None + raise CloudKitRateLimited("HTTP 429: rate limited", retry_after=retry_after) if code >= 400: raise CloudKitApiError( f"HTTP {code} on asset GET", diff --git a/pyicloud/services/photos_cloudkit/client.py b/pyicloud/services/photos_cloudkit/client.py index 82670a53..debbffbb 100644 --- a/pyicloud/services/photos_cloudkit/client.py +++ b/pyicloud/services/photos_cloudkit/client.py @@ -6,6 +6,8 @@ from typing import Dict from urllib.parse import urlencode +from pydantic import ValidationError + from pyicloud.common.cloudkit import ( CKLookupResponse, CKModifyOperation, @@ -138,7 +140,12 @@ def batch_count(self, *, container_id: str, zone_id: dict[str, str]) -> int: payload, headers={CONTENT_TYPE: CONTENT_TYPE_TEXT}, ) - data = PhotosBatchCountResponse.model_validate(raw_data) + try: + data = PhotosBatchCountResponse.model_validate(raw_data) + except ValidationError as exc: + raise CloudKitApiError( + "Photos count query failed", payload=raw_data + ) from exc try: return data.batch[0].records[0].fields.itemCount.value except Exception as exc: diff --git a/pyicloud/services/photos_cloudkit/mappers.py b/pyicloud/services/photos_cloudkit/mappers.py index b4c137f5..67bbbde7 100644 --- a/pyicloud/services/photos_cloudkit/mappers.py +++ b/pyicloud/services/photos_cloudkit/mappers.py @@ -26,7 +26,10 @@ def decode_encrypted_text(record: CKRecord, field_name: str) -> str | None: if isinstance(value, bytes): raw = value elif isinstance(value, str): - raw = value.encode("ascii") + try: + raw = value.encode("ascii") + except UnicodeEncodeError: + return value else: return None diff --git a/pyicloud/services/photos_cloudkit/materialize.py b/pyicloud/services/photos_cloudkit/materialize.py index 62dbb3b3..f4ba7956 100644 --- a/pyicloud/services/photos_cloudkit/materialize.py +++ b/pyicloud/services/photos_cloudkit/materialize.py @@ -103,7 +103,7 @@ def set_exif_datetime_if_missing(path: Path, taken_at: datetime) -> None: updated = _insert_exif_datetime_segment( jpeg_bytes=data, - timestamp=taken_at.astimezone().strftime("%Y:%m:%d %H:%M:%S"), + timestamp=taken_at.strftime("%Y:%m:%d %H:%M:%S"), ) if updated is None: LOGGER.debug("Failed to update EXIF datetime on %s", path) @@ -413,7 +413,13 @@ def _jpeg_has_exif_datetime(jpeg_bytes: bytes) -> bool: if exif_payload is None: return False - parsed = _parse_tiff_ifd(exif_payload, _read_uint32(exif_payload, 4, b"<")) + byte_order = _tiff_byte_order(exif_payload) + if byte_order is None: + return False + ifd0_offset = _read_uint32(exif_payload, 4, byte_order) + if ifd0_offset is None: + return False + parsed = _parse_tiff_ifd(exif_payload, ifd0_offset) if parsed is None: return False _, ifd0 = parsed @@ -457,6 +463,16 @@ def _extract_exif_payload(jpeg_bytes: bytes) -> bytes | None: return None +def _tiff_byte_order(exif_payload: bytes) -> bytes | None: + if len(exif_payload) < 2: + return None + if exif_payload[:2] == b"II": + return b"<" + if exif_payload[:2] == b"MM": + return b">" + return None + + def _insert_exif_datetime_segment(*, jpeg_bytes: bytes, timestamp: str) -> bytes | None: if len(jpeg_bytes) < 2 or jpeg_bytes[:2] != b"\xff\xd8": return None diff --git a/pyicloud/services/photos_cloudkit/service.py b/pyicloud/services/photos_cloudkit/service.py index fe96617d..704397ad 100644 --- a/pyicloud/services/photos_cloudkit/service.py +++ b/pyicloud/services/photos_cloudkit/service.py @@ -1281,9 +1281,20 @@ def id(self) -> str: @property def fullname(self) -> str: - if self._parent_id is not None: - return f"{self._library.albums[self._parent_id].fullname}/{self.name}" - return self.name + return self._fullname(seen=set()) + + def _fullname(self, *, seen: set[str]) -> str: + if self.id in seen or self._parent_id is None: + return self.name + seen.add(self.id) + parent = self._library.albums.get(self._parent_id) + if parent is None or parent.id in seen: + return self.name + if isinstance(parent, PhotoAlbum): + parent_fullname = parent._fullname(seen=seen) + else: + parent_fullname = parent.fullname + return f"{parent_fullname}/{self.name}" if parent_fullname else self.name def rename(self, value: str) -> None: if self._name == value: @@ -1508,7 +1519,18 @@ def _get_len(self) -> int: headers={CONTENT_TYPE: CONTENT_TYPE_TEXT}, ) response = request.json() - return response["batch"][0]["records"][0]["fields"]["itemCount"]["value"] + try: + return int( + response["batch"][0]["records"][0]["fields"]["itemCount"]["value"] + ) + except (IndexError, KeyError, TypeError, ValueError): + LOGGER.debug( + "Unexpected Photos count response for %s: %r", + self._get_container_id, + response, + exc_info=True, + ) + return 0 return self._client.batch_count( container_id=self._get_container_id, zone_id=self._zone_id, diff --git a/pyicloud/services/photos_cloudkit/sync.py b/pyicloud/services/photos_cloudkit/sync.py index 81788c43..b7dfbb78 100644 --- a/pyicloud/services/photos_cloudkit/sync.py +++ b/pyicloud/services/photos_cloudkit/sync.py @@ -29,6 +29,7 @@ DEFAULT_FOLDER_STRUCTURE = "none" PRIMARY_SYNC_VERSIONS = {"original", "medium", "thumb"} LIVE_PHOTO_SYNC_VERSIONS = {"original", "medium", "thumb"} +UNSAFE_PATH_COMPONENT_CHARS = re.compile(r"[\x00-\x1f/\\:]+") _LOGGER = logging.getLogger(__name__) @@ -212,8 +213,13 @@ def run_photo_sync(service: Any, options: PhotoSyncOptions) -> PhotoSyncResult: and options.keep_icloud_recent_days < 0 ): raise PhotosServiceException("--keep-icloud-recent-days must be at least 0.") + if options.keep_icloud_recent_days is not None and options.until_found is not None: + raise PhotosServiceException( + "--keep-icloud-recent-days cannot be combined with --until-found." + ) - options.directory.mkdir(parents=True, exist_ok=True) + if not options.dry_run and not options.only_print_filenames: + options.directory.mkdir(parents=True, exist_ok=True) result = PhotoSyncResult( directory=str(options.directory), state_path=str(options.state_path()), @@ -282,7 +288,29 @@ def run_photo_sync(service: Any, options: PhotoSyncOptions) -> PhotoSyncResult: reserved_paths.add(relative_path) current_entries.add((asset.id, resource_key)) asset_paths.append(relative_path) - target_path = options.directory / relative_path + try: + target_path = _safe_target_path(options.directory, relative_path) + except PhotosServiceException as exc: + _LOGGER.warning( + "Skipping resource with unsafe path '%s' under '%s': %s", + relative_path, + options.directory, + exc, + ) + result.items.append( + PhotoSyncItem( + asset_id=asset.id, + resource_key=resource_key, + path=relative_path, + action="skipped", + reason="unsafe-path", + ) + ) + result.skipped_count += 1 + asset_ready_for_delete = False + consecutive_seen = 0 + sync_complete = False + continue manifest = state.get_resource(asset.id, resource_key) if _is_current_file(target_path, manifest, resource, relative_path): result.items.append( @@ -409,7 +437,28 @@ def run_photo_sync(service: Any, options: PhotoSyncOptions) -> PhotoSyncResult: key = (stale.asset_id, stale.resource_key) if key in current_entries: continue - stale_path = options.directory / stale.relative_path + try: + stale_path = _safe_target_path( + options.directory, stale.relative_path + ) + except PhotosServiceException as exc: + _LOGGER.warning( + "Ignoring unsafe stale local photo path '%s': %s", + stale.relative_path, + exc, + ) + state.delete_resource(stale.asset_id, stale.resource_key) + result.items.append( + PhotoSyncItem( + asset_id=stale.asset_id, + resource_key=stale.resource_key, + path=stale.relative_path, + action="skipped", + reason="unsafe-path", + ) + ) + result.skipped_count += 1 + continue if stale_path.exists(): try: stale_path.unlink() @@ -487,8 +536,18 @@ def _can_short_circuit( if state.resource_count() == 0: return False for entry in state.iter_resources(): - if not (directory / entry.relative_path).exists(): + try: + path = _safe_target_path(directory, entry.relative_path) + except PhotosServiceException: return False + if not path.exists(): + return False + if entry.size is not None: + try: + if path.stat().st_size != entry.size: + return False + except OSError: + return False return True @@ -594,8 +653,9 @@ def _resolve_resource( def _render_relative_path( asset: Any, resource: PhotoResource, folder_structure: str ) -> str: + filename = _safe_resource_filename(resource) if folder_structure == "none": - return resource.filename + return filename asset_date = _asset_datetime(asset, "asset_date") or datetime.fromtimestamp( 0, timezone.utc ) @@ -608,10 +668,10 @@ def _render_relative_path( raise PhotosServiceException( f"Invalid folder structure format '{folder_structure}'." ) from exc - folder = folder.strip().strip("/") + folder = _safe_relative_folder(folder) if not folder: - return resource.filename - relative = PurePosixPath(folder) / resource.filename + return filename + relative = PurePosixPath(folder) / filename return relative.as_posix() @@ -628,10 +688,10 @@ def _unique_relative_path( owner is None or owner == (asset_id, resource_key) ): return candidate - path = Path(candidate) + path = PurePosixPath(candidate) stem = path.stem suffix = path.suffix - directory = Path(candidate).parent + directory = path.parent discriminator = asset_id[:8] index = 1 while True: @@ -645,6 +705,42 @@ def _unique_relative_path( index += 1 +def _safe_resource_filename(resource: PhotoResource) -> str: + filename = str(getattr(resource, "filename", "") or "photo") + filename = PurePosixPath(filename.replace("\\", "/")).name + return _sanitize_path_component(filename, fallback="photo") + + +def _safe_relative_folder(folder: str) -> str: + parts: list[str] = [] + for part in PurePosixPath(folder.replace("\\", "/")).parts: + if part in {"", ".", "..", "/"}: + continue + parts.append(_sanitize_path_component(part, fallback="folder")) + if not parts: + return "" + return PurePosixPath(*parts).as_posix() + + +def _sanitize_path_component(value: str, *, fallback: str) -> str: + sanitized = UNSAFE_PATH_COMPONENT_CHARS.sub("-", value).strip() + if sanitized in {"", ".", ".."}: + return fallback + return sanitized + + +def _safe_target_path(directory: Path, relative_path: str) -> Path: + base = directory.resolve(strict=False) + target = (base / relative_path).resolve(strict=False) + try: + target.relative_to(base) + except ValueError as exc: + raise PhotosServiceException( + f"Refusing to access path outside sync directory: {relative_path}" + ) from exc + return target + + def _is_current_file( path: Path, manifest: SyncedPhotoResource | None, diff --git a/pyicloud/services/photos_legacy.py b/pyicloud/services/photos_legacy.py index 5ab7a9e0..7c1cc1ba 100644 --- a/pyicloud/services/photos_legacy.py +++ b/pyicloud/services/photos_legacy.py @@ -728,7 +728,10 @@ def libraries(self) -> dict[str, BasePhotoLibrary]: url: str = f"{self.service_endpoint}/changes/database" request: Response = self.session.post( - url, data="{}", headers={CONTENT_TYPE: CONTENT_TYPE_TEXT} + url, + params=self.params, + data="{}", + headers={CONTENT_TYPE: CONTENT_TYPE_TEXT}, ) response: dict[str, Any] = request.json() zones: list[dict[str, Any]] = response["zones"] @@ -1032,10 +1035,20 @@ def id(self) -> str: @property def fullname(self) -> str: - if self._parent_id is not None: - return f"{self._library.albums[self._parent_id].fullname}/{self.name}" - - return self.name + parts: list[str] = [self.name] + visited: set[str] = {self.id} + parent_id = self._parent_id + while parent_id is not None: + if parent_id in visited: + _LOGGER.warning("Cycle detected in album parent chain for %s", self.id) + break + visited.add(parent_id) + parent = self._library.albums.get(parent_id) + if parent is None: + break + parts.append(parent.name) + parent_id = getattr(parent, "_parent_id", None) + return "/".join(reversed(parts)) def rename(self, value: str) -> None: """Renames the album.""" @@ -1253,7 +1266,18 @@ def _get_len(self) -> int: ) response: dict[str, Any] = request.json() - return response["batch"][0]["records"][0]["fields"]["itemCount"]["value"] + try: + return int( + response["batch"][0]["records"][0]["fields"]["itemCount"]["value"] + ) + except (IndexError, KeyError, TypeError, ValueError): + _LOGGER.debug( + "Unexpected Photos count response for %s: %r", + self._get_container_id, + response, + exc_info=True, + ) + return 0 def _get_url(self) -> str: return self._url diff --git a/tests/fixtures/photos_browser_mutations/README.md b/tests/fixtures/photos_browser_mutations/README.md index 0c8fc57a..d3fd7adc 100644 --- a/tests/fixtures/photos_browser_mutations/README.md +++ b/tests/fixtures/photos_browser_mutations/README.md @@ -6,7 +6,7 @@ See also the top-level fixture guide in for how these files relate to the broader Photos protocol fixture set. They intentionally exclude raw HAR files, binary responses, cookies, and account -identifiers. Stable placeholder values are used instead so request and response +identifiers. Stable placeholder values are used instead, so request and response relationships remain testable without exposing personal data. The fixture set covers: diff --git a/tests/services/test_photos.py b/tests/services/test_photos.py index e1f6ed7e..bd7da85f 100644 --- a/tests/services/test_photos.py +++ b/tests/services/test_photos.py @@ -50,6 +50,7 @@ SmartPhotoAlbum, ) from pyicloud.services.photos_cloudkit.mappers import ( + decode_encrypted_text, record_change_tag, record_field_value, ) @@ -119,6 +120,19 @@ encoding="utf-8" ) ) + + +def test_decode_encrypted_text_returns_predecoded_unicode() -> None: + """Already-decoded non-ASCII text should not fail ASCII coercion.""" + + record = { + "recordName": "asset-1", + "fields": {"captionEnc": {"value": "\u2603"}}, + } + + assert decode_encrypted_text(record, "captionEnc") == "\u2603" + + SHARED_LIBRARY_UNFAVORITE_REQUEST = json.loads( (FIXTURE_DIR / "photos_shared_library_unfavorite_request.json").read_text( encoding="utf-8" @@ -2245,6 +2259,7 @@ def test_legacy_photos_service_libraries_propagate_upload_url() -> None: libraries = service.libraries + assert session.post.call_args_list[0].kwargs["params"] == service.params assert libraries["root"]._upload_url == "https://upload.example.com" assert libraries["CustomZone"]._upload_url == "https://upload.example.com" @@ -2434,10 +2449,11 @@ def test_photo_album_fullname_with_parent() -> None: """Tests fullname property when album has a parent.""" mock_photo_library: MagicMock = MagicMock(spec=PhotoLibrary) parent_album = MagicMock() + parent_album.id = "parent123" parent_album.fullname = "Parent Album" mock_albums = MagicMock() - mock_albums.__getitem__.return_value = parent_album + mock_albums.get.return_value = parent_album mock_photo_library.albums = mock_albums album = PhotoAlbum( @@ -2452,7 +2468,56 @@ def test_photo_album_fullname_with_parent() -> None: ) assert album.fullname == "Parent Album/Child Album" - mock_albums.__getitem__.assert_called_once_with("parent123") + mock_albums.get.assert_called_once_with("parent123") + + +def test_photo_album_fullname_missing_parent_falls_back_to_name() -> None: + """Malformed parent references should not make album names unreadable.""" + + mock_photo_library: MagicMock = MagicMock(spec=PhotoLibrary) + mock_photo_library.albums = AlbumContainer() + + album = PhotoAlbum( + library=mock_photo_library, + name="Child Album", + record_id="album123", + obj_type=ObjectTypeEnum.CONTAINER, + list_type=ListTypeEnum.CONTAINER, + direction=DirectionEnum.ASCENDING, + url="https://example.com/records/query?dsid=12345", + parent_id="missing-parent", + ) + + assert album.fullname == "Child Album" + + +def test_photo_album_fullname_cycle_falls_back_without_recursing() -> None: + """Malformed cyclic parent graphs should not recurse forever.""" + + mock_photo_library: MagicMock = MagicMock(spec=PhotoLibrary) + parent_album = PhotoAlbum( + library=mock_photo_library, + name="Parent Album", + record_id="parent123", + obj_type=ObjectTypeEnum.CONTAINER, + list_type=ListTypeEnum.CONTAINER, + direction=DirectionEnum.ASCENDING, + url="https://example.com/records/query?dsid=12345", + parent_id="child123", + ) + child_album = PhotoAlbum( + library=mock_photo_library, + name="Child Album", + record_id="child123", + obj_type=ObjectTypeEnum.CONTAINER, + list_type=ListTypeEnum.CONTAINER, + direction=DirectionEnum.ASCENDING, + url="https://example.com/records/query?dsid=12345", + parent_id="parent123", + ) + mock_photo_library.albums = AlbumContainer([parent_album, child_album]) + + assert child_album.fullname == "Parent Album/Child Album" def test_photo_album_rename_success(mock_photos_service: MagicMock) -> None: @@ -2625,6 +2690,80 @@ def test_legacy_photo_album_rename_raises_for_error_payload() -> None: assert album._record_change_tag == "tag123" +def test_legacy_photo_album_fullname_missing_parent_falls_back_to_name() -> None: + """Malformed legacy parent references should not raise KeyError.""" + + mock_photo_library = MagicMock(spec=LegacyPhotoLibrary) + mock_photo_library.albums = LegacyAlbumContainer() + + album = LegacyPhotoAlbum( + library=mock_photo_library, + name="Child Album", + record_id="child123", + obj_type=ObjectTypeEnum.CONTAINER, + list_type=ListTypeEnum.CONTAINER, + direction=DirectionEnum.ASCENDING, + url="https://example.com/records/query?dsid=12345", + parent_id="missing-parent", + ) + + assert album.fullname == "Child Album" + + +def test_legacy_photo_album_fullname_cycle_falls_back_without_recursing() -> None: + """Malformed legacy cyclic parent graphs should not recurse forever.""" + + mock_photo_library = MagicMock(spec=LegacyPhotoLibrary) + parent_album = LegacyPhotoAlbum( + library=mock_photo_library, + name="Parent Album", + record_id="parent123", + obj_type=ObjectTypeEnum.CONTAINER, + list_type=ListTypeEnum.CONTAINER, + direction=DirectionEnum.ASCENDING, + url="https://example.com/records/query?dsid=12345", + parent_id="child123", + ) + child_album = LegacyPhotoAlbum( + library=mock_photo_library, + name="Child Album", + record_id="child123", + obj_type=ObjectTypeEnum.CONTAINER, + list_type=ListTypeEnum.CONTAINER, + direction=DirectionEnum.ASCENDING, + url="https://example.com/records/query?dsid=12345", + parent_id="parent123", + ) + mock_photo_library.albums = LegacyAlbumContainer([parent_album, child_album]) + + assert child_album.fullname == "Parent Album/Child Album" + + +def test_legacy_photo_album_get_len_returns_zero_for_malformed_response() -> None: + """Malformed legacy count payloads should not abort album iteration.""" + + mock_photo_library = MagicMock(spec=LegacyPhotoLibrary) + mock_photo_library.service = MagicMock() + mock_photo_library.service.service_endpoint = "https://example.com/endpoint" + mock_photo_library.service.params = {"dsid": "12345"} + mock_photo_library.service.session.post.return_value = MagicMock( + json=MagicMock(return_value={"batch": []}) + ) + + album = LegacyPhotoAlbum( + library=mock_photo_library, + name="Test Album", + record_id="album123", + obj_type=ObjectTypeEnum.CONTAINER, + list_type=ListTypeEnum.CONTAINER, + direction=DirectionEnum.ASCENDING, + url="https://example.com/records/query?dsid=12345", + zone_id={"zoneName": "TestZone"}, + ) + + assert album._get_len() == 0 + + def test_photo_album_delete_success(mock_photo_library: MagicMock) -> None: """Tests successful album deletion.""" mock_photo_library.service.session.post.return_value = MagicMock() @@ -3068,6 +3207,31 @@ def test_photo_album_get_len(mock_photo_library: MagicMock) -> None: ) +def test_photo_album_get_len_returns_zero_for_malformed_legacy_response( + mock_photo_library: MagicMock, +) -> None: + """Malformed legacy count payloads should not escape as indexing errors.""" + + mock_photo_library.service.session.post.return_value.json.return_value = { + "batch": [] + } + mock_photo_library.service.service_endpoint = "https://example.com/endpoint" + mock_photo_library.service.params = {"dsid": "12345"} + + album = PhotoAlbum( + library=mock_photo_library, + name="Test Album", + record_id="album123", + obj_type=ObjectTypeEnum.CONTAINER, + list_type=ListTypeEnum.CONTAINER, + direction=DirectionEnum.ASCENDING, + url="https://example.com/records/query?dsid=12345", + zone_id={"zoneName": "TestZone"}, + ) + + assert album._get_len() == 0 + + def test_photo_album_get_payload(mock_photo_library: MagicMock) -> None: """Tests _get_payload method.""" album = PhotoAlbum( diff --git a/tests/services/test_photos_cloudkit_client.py b/tests/services/test_photos_cloudkit_client.py index f55859a8..f2594cfb 100644 --- a/tests/services/test_photos_cloudkit_client.py +++ b/tests/services/test_photos_cloudkit_client.py @@ -3,12 +3,13 @@ from __future__ import annotations import json +import logging from pathlib import Path from unittest.mock import MagicMock, mock_open, patch import pytest -from pyicloud.common.cloudkit.client import CloudKitApiError +from pyicloud.common.cloudkit.client import CloudKitApiError, CloudKitRateLimited from pyicloud.const import CONTENT_TYPE, CONTENT_TYPE_TEXT from pyicloud.services.photos_cloudkit.client import PhotosCloudKitClient @@ -200,6 +201,38 @@ def test_batch_count_posts_expected_internal_query_payload() -> None: assert payload["batch"][0]["zoneID"] == {"zoneName": "PrimarySync"} +def test_batch_count_debug_log_omits_cloudkit_query_params(caplog) -> None: + """CloudKit request logs should avoid user-identifying query parameters.""" + + session = MagicMock() + session.post.return_value = MagicMock( + json=lambda: { + "batch": [ + { + "records": [ + {"fields": {"itemCount": {"value": 42}}}, + ] + } + ] + } + ) + client = PhotosCloudKitClient( + base_url="https://example.com/database/1/container/production/private", + session=session, + base_params={"dsid": "12345"}, + ) + caplog.set_level(logging.DEBUG, logger="pyicloud.common.cloudkit.client") + + client.batch_count( + container_id="CPLContainerRelationLiveByPosition:album123", + zone_id={"zoneName": "PrimarySync"}, + ) + + messages = "\n".join(record.getMessage() for record in caplog.records) + assert "CloudKit POST /internal/records/query/batch" in messages + assert "dsid=12345" not in messages + + def test_batch_count_raises_on_malformed_payload() -> None: """Malformed count responses should be surfaced as CloudKitApiError.""" @@ -218,6 +251,77 @@ def test_batch_count_raises_on_malformed_payload() -> None: ) +def test_batch_count_raises_cloudkit_error_for_validation_failure() -> None: + """Invalid count response models should be normalized into CloudKitApiError.""" + + session = MagicMock() + session.post.return_value = MagicMock( + json=lambda: { + "batch": [ + { + "records": [ + {"fields": {"itemCount": {"value": "not-an-int"}}}, + ] + } + ] + } + ) + client = PhotosCloudKitClient( + base_url="https://example.com/database/1/container/production/private", + session=session, + base_params={"dsid": "12345"}, + ) + + with pytest.raises(CloudKitApiError, match="Photos count query failed"): + client.batch_count( + container_id="CPLContainerRelationLiveByPosition:album123", + zone_id={"zoneName": "PrimarySync"}, + ) + + +def test_download_asset_bytes_preserves_rate_limit_retry_after() -> None: + """Asset GET rate limits should expose Retry-After like CloudKit POSTs.""" + + session = MagicMock() + session.get.return_value = MagicMock( + status_code=429, + headers={"Retry-After": "2.5"}, + text="rate limited", + ) + client = PhotosCloudKitClient( + base_url="https://example.com/database/1/container/production/private", + session=session, + base_params={"dsid": "12345"}, + ) + + with pytest.raises(CloudKitRateLimited) as exc_info: + client.download_asset_bytes("https://example.com/asset") + + assert exc_info.value.retry_after == 2.5 + + +def test_download_asset_bytes_redacts_signed_url_in_debug_log(caplog) -> None: + """Asset GET logs should not include signed download URLs.""" + + session = MagicMock() + session.get.return_value = MagicMock(status_code=200, content=b"asset") + client = PhotosCloudKitClient( + base_url="https://example.com/database/1/container/production/private", + session=session, + base_params={"dsid": "12345"}, + ) + signed_url = "https://cvws.icloud-content.com/asset?dsid=12345&token=secret" + caplog.set_level(logging.DEBUG, logger="pyicloud.common.cloudkit.client") + + assert client.download_asset_bytes(signed_url) == b"asset" + + messages = "\n".join(record.getMessage() for record in caplog.records) + assert "CloudKit asset GET " in messages + assert signed_url not in messages + assert "dsid=12345" not in messages + assert "token=secret" not in messages + + def test_batch_count_raises_cloudkit_error_for_http_error() -> None: """Batch count queries should use shared CloudKit HTTP error handling.""" diff --git a/tests/services/test_photos_sync.py b/tests/services/test_photos_sync.py index bb8b1fac..0493b0e8 100644 --- a/tests/services/test_photos_sync.py +++ b/tests/services/test_photos_sync.py @@ -3,6 +3,7 @@ from __future__ import annotations import base64 +import struct import tempfile from datetime import datetime, timedelta, timezone from pathlib import Path @@ -10,16 +11,21 @@ from typing import Optional from unittest.mock import patch +import pytest + from pyicloud.services.photos import ( PhotoResource, + PhotosServiceException, PhotoSyncOptions, run_photo_sync, watch_photo_sync, ) +from pyicloud.services.photos_cloudkit import materialize as materialize_module from pyicloud.services.photos_cloudkit import sync as sync_module from pyicloud.services.photos_cloudkit.state import ( MemoryPhotoSyncState, SQLitePhotoSyncState, + SyncedPhotoResource, create_photo_sync_state, ) @@ -132,7 +138,7 @@ def __init__( key="original", filename=filename, url=f"https://example.com/{asset_id}/original", - size=32, + size=len(f"{asset_id}:original".encode()), type="public.jpeg", checksum=f"checksum-{asset_id}", ) @@ -227,6 +233,80 @@ def test_run_photo_sync_downloads_and_persists_manifest() -> None: temp_dir.rmdir() +def test_run_photo_sync_sanitizes_remote_filenames() -> None: + """Remote filenames must not escape the configured output directory.""" + + asset = DummyAsset("asset-escape", "../escape.jpg") + service = DummyService(DummyAlbum("All Photos", [asset]), cursor="cursor-escape") + + temp_dir = Path(tempfile.mkdtemp(prefix="photos-sync-escape-", dir=TEST_BASE)) + try: + output_dir = temp_dir / "output" + state_dir = temp_dir / "state" + result = run_photo_sync( + service, + PhotoSyncOptions(directory=output_dir, state_dir=state_dir), + ) + + assert result.downloaded_count == 1 + assert (output_dir / "escape.jpg").read_bytes() == b"asset-escape:original" + assert not (temp_dir / "escape.jpg").exists() + with SQLitePhotoSyncState(Path(result.state_path)) as state: + manifest = state.get_resource("asset-escape", "original") + assert manifest is not None + assert manifest.relative_path == "escape.jpg" + finally: + for path in sorted(temp_dir.rglob("*"), reverse=True): + if path.is_file(): + path.unlink() + elif path.is_dir(): + path.rmdir() + temp_dir.rmdir() + + +def test_run_photo_sync_sanitizes_folder_structure_paths() -> None: + """Folder structure output should stay under the configured directory.""" + + asset = DummyAsset( + "asset-folder-escape", + "photo.jpg", + asset_date=datetime(2026, 4, 21, tzinfo=timezone.utc), + ) + service = DummyService( + DummyAlbum("All Photos", [asset]), cursor="cursor-folder-escape" + ) + + temp_dir = Path( + tempfile.mkdtemp(prefix="photos-sync-folder-escape-", dir=TEST_BASE) + ) + try: + output_dir = temp_dir / "output" + state_dir = temp_dir / "state" + result = run_photo_sync( + service, + PhotoSyncOptions( + directory=output_dir, + state_dir=state_dir, + folder_structure="../{:%Y}/../../nested", + ), + ) + + assert result.downloaded_count == 1 + assert (output_dir / "2026" / "nested" / "photo.jpg").exists() + assert not (temp_dir / "nested" / "photo.jpg").exists() + with SQLitePhotoSyncState(Path(result.state_path)) as state: + manifest = state.get_resource("asset-folder-escape", "original") + assert manifest is not None + assert manifest.relative_path == "2026/nested/photo.jpg" + finally: + for path in sorted(temp_dir.rglob("*"), reverse=True): + if path.is_file(): + path.unlink() + elif path.is_dir(): + path.rmdir() + temp_dir.rmdir() + + def test_run_photo_sync_auto_delete_removes_stale_files() -> None: """Auto-delete should remove previously tracked files absent from the latest run.""" @@ -268,6 +348,110 @@ def test_run_photo_sync_auto_delete_removes_stale_files() -> None: temp_dir.rmdir() +def test_run_photo_sync_auto_delete_ignores_unsafe_stale_paths() -> None: + """Auto-delete must not remove paths outside the configured output directory.""" + + service = DummyService(DummyAlbum("All Photos", []), cursor="cursor-clean") + + temp_dir = Path( + tempfile.mkdtemp(prefix="photos-sync-unsafe-delete-", dir=TEST_BASE) + ) + try: + output_dir = temp_dir / "output" + state_dir = temp_dir / "state" + output_dir.mkdir() + outside_path = temp_dir / "outside.jpg" + outside_path.write_bytes(b"keep") + + options = PhotoSyncOptions(directory=output_dir, state_dir=state_dir) + with SQLitePhotoSyncState(options.state_path()) as state: + state.upsert_resource( + SyncedPhotoResource( + asset_id="asset-stale", + resource_key="original", + relative_path="../outside.jpg", + size=None, + checksum=None, + downloaded_at="2026-04-21T00:00:00+00:00", + ) + ) + + result = run_photo_sync( + service, + PhotoSyncOptions( + directory=output_dir, + state_dir=state_dir, + auto_delete=True, + ), + ) + + assert outside_path.read_bytes() == b"keep" + assert result.deleted_count == 0 + assert result.skipped_count == 1 + assert result.items[0].reason == "unsafe-path" + with SQLitePhotoSyncState(Path(result.state_path)) as state: + assert state.get_resource("asset-stale", "original") is None + finally: + for path in sorted(temp_dir.rglob("*"), reverse=True): + if path.is_file(): + path.unlink() + elif path.is_dir(): + path.rmdir() + temp_dir.rmdir() + + +def test_run_photo_sync_skips_resource_when_safe_target_path_rejects() -> None: + """Unexpected path validation failures should skip the resource and continue.""" + + service = DummyService( + DummyAlbum( + "All Photos", + [ + DummyAsset("asset-unsafe", "unsafe.jpg"), + DummyAsset("asset-safe", "safe.jpg"), + ], + ), + cursor="cursor-path-validation", + ) + + temp_dir = Path(tempfile.mkdtemp(prefix="photos-sync-path-reject-", dir=TEST_BASE)) + try: + output_dir = temp_dir / "output" + state_dir = temp_dir / "state" + original_safe_target_path = sync_module._safe_target_path + + def reject_one_path(directory: Path, relative_path: str) -> Path: + if relative_path == "unsafe.jpg": + raise PhotosServiceException("unsafe test path") + return original_safe_target_path(directory, relative_path) + + with patch( + "pyicloud.services.photos_cloudkit.sync._safe_target_path", + side_effect=reject_one_path, + ): + result = run_photo_sync( + service, + PhotoSyncOptions(directory=output_dir, state_dir=state_dir), + ) + + assert result.skipped_count == 1 + assert result.downloaded_count == 1 + assert any(item.reason == "unsafe-path" for item in result.items) + assert not (output_dir / "unsafe.jpg").exists() + assert (output_dir / "safe.jpg").exists() + with SQLitePhotoSyncState(Path(result.state_path)) as state: + assert state.get_sync_cursor() is None + assert state.get_resource("asset-unsafe", "original") is None + assert state.get_resource("asset-safe", "original") is not None + finally: + for path in sorted(temp_dir.rglob("*"), reverse=True): + if path.is_file(): + path.unlink() + elif path.is_dir(): + path.rmdir() + temp_dir.rmdir() + + def test_run_photo_sync_auto_delete_continues_when_unlink_fails() -> None: """Auto-delete should skip locked files without corrupting sync state.""" @@ -328,7 +512,7 @@ def flaky_unlink(path_obj: Path, *args, **kwargs) -> None: def test_run_photo_sync_dry_run_does_not_create_state() -> None: - """Preview-only sync runs should avoid creating a new SQLite state file.""" + """Preview-only sync runs should avoid creating output directories or state.""" service = DummyService( DummyAlbum("All Photos", [DummyAsset("asset-1", "preview.jpg")]), @@ -345,6 +529,7 @@ def test_run_photo_sync_dry_run_does_not_create_state() -> None: ) assert result.listed_count == 1 + assert not output_dir.exists() assert not (output_dir / "preview.jpg").exists() assert not Path(result.state_path).exists() finally: @@ -356,6 +541,63 @@ def test_run_photo_sync_dry_run_does_not_create_state() -> None: temp_dir.rmdir() +def test_run_photo_sync_only_print_does_not_create_output_or_state() -> None: + """Filename-only previews should leave the filesystem untouched.""" + + service = DummyService( + DummyAlbum("All Photos", [DummyAsset("asset-1", "preview.jpg")]), + cursor="cursor-print-preview", + ) + + temp_dir = Path( + tempfile.mkdtemp(prefix="photos-sync-print-preview-", dir=TEST_BASE) + ) + try: + output_dir = temp_dir / "output" + state_dir = temp_dir / "state" + result = run_photo_sync( + service, + PhotoSyncOptions( + directory=output_dir, + state_dir=state_dir, + only_print_filenames=True, + ), + ) + + assert result.listed_count == 1 + assert not output_dir.exists() + assert not Path(result.state_path).exists() + finally: + for path in sorted(temp_dir.rglob("*"), reverse=True): + if path.is_file(): + path.unlink() + elif path.is_dir(): + path.rmdir() + temp_dir.rmdir() + + +def test_run_photo_sync_rejects_remote_delete_with_until_found() -> None: + """Remote deletion must not combine with partial until-found scans.""" + + service = DummyService( + DummyAlbum("All Photos", [DummyAsset("asset-1", "photo.jpg")]), + cursor="cursor-until-delete", + ) + + with pytest.raises( + PhotosServiceException, + match="--keep-icloud-recent-days cannot be combined with --until-found", + ): + run_photo_sync( + service, + PhotoSyncOptions( + directory=Path("/tmp/unused"), + keep_icloud_recent_days=0, + until_found=1, + ), + ) + + def test_run_photo_sync_live_photos_respect_video_flags() -> None: """Live photo sync should fetch both resources unless video downloads are skipped.""" @@ -448,6 +690,53 @@ def test_watch_photo_sync_repeats_runs_and_sleeps_between_iterations() -> None: temp_dir.rmdir() +def test_run_photo_sync_short_circuit_validates_tracked_file_size() -> None: + """A matching cursor should not hide a truncated tracked local file.""" + + payload = b"complete-image-bytes" + asset = DummyAsset( + "asset-sized", + "photo.jpg", + payloads={"original": payload}, + resources={ + "original": PhotoResource( + key="original", + filename="photo.jpg", + url="https://example.com/sized/original", + size=len(payload), + type="public.jpeg", + ) + }, + ) + service = DummyService(DummyAlbum("All Photos", [asset]), cursor="cursor-sized") + + temp_dir = Path(tempfile.mkdtemp(prefix="photos-sync-sized-", dir=TEST_BASE)) + try: + output_dir = temp_dir / "output" + state_dir = temp_dir / "state" + first = run_photo_sync( + service, + PhotoSyncOptions(directory=output_dir, state_dir=state_dir), + ) + (output_dir / "photo.jpg").write_bytes(b"bad") + second = run_photo_sync( + service, + PhotoSyncOptions(directory=output_dir, state_dir=state_dir), + ) + + assert first.downloaded_count == 1 + assert second.short_circuited is False + assert second.downloaded_count == 1 + assert (output_dir / "photo.jpg").read_bytes() == payload + finally: + for path in sorted(temp_dir.rglob("*"), reverse=True): + if path.is_file(): + path.unlink() + elif path.is_dir(): + path.rmdir() + temp_dir.rmdir() + + def test_run_photo_sync_align_raw_prefers_requested_representation() -> None: """RAW alignment should swap original and alternative resources when requested.""" @@ -553,6 +842,7 @@ def test_run_photo_sync_sets_exif_datetime_for_jpegs_without_exif() -> None: asset = DummyAsset( "asset-exif", "photo.jpg", + asset_date=datetime(2026, 4, 21, tzinfo=timezone.utc), payloads={"original": MINIMAL_JPEG}, ) service = DummyService(DummyAlbum("All Photos", [asset]), cursor="cursor-exif") @@ -584,6 +874,54 @@ def test_run_photo_sync_sets_exif_datetime_for_jpegs_without_exif() -> None: temp_dir.rmdir() +def test_set_exif_datetime_preserves_asset_wall_clock_timezone() -> None: + """EXIF insertion should not convert asset timestamps to the local timezone.""" + + temp_dir = Path(tempfile.mkdtemp(prefix="photos-sync-exif-tz-", dir=TEST_BASE)) + try: + path = temp_dir / "photo.jpg" + path.write_bytes(MINIMAL_JPEG) + taken_at = datetime( + 2026, + 1, + 1, + 23, + 30, + tzinfo=timezone(timedelta(hours=-5)), + ) + + materialize_module.set_exif_datetime_if_missing(path, taken_at) + + assert b"2026:01:01 23:30:00" in path.read_bytes() + finally: + for path in sorted(temp_dir.rglob("*"), reverse=True): + if path.is_file(): + path.unlink() + elif path.is_dir(): + path.rmdir() + temp_dir.rmdir() + + +def test_jpeg_has_exif_datetime_reads_big_endian_ifd_offsets() -> None: + """Big-endian EXIF payloads should be inspected without inserting duplicates.""" + + timestamp = b"2026:04:21 12:34:56\x00" + tiff = ( + b"MM" + + struct.pack(">H", 42) + + struct.pack(">I", 8) + + struct.pack(">H", 1) + + struct.pack(">HHII", 0x0132, 2, len(timestamp), 26) + + struct.pack(">I", 0) + + timestamp + ) + payload = b"Exif\x00\x00" + tiff + segment = b"\xff\xe1" + struct.pack(">H", len(payload) + 2) + payload + jpeg_bytes = b"\xff\xd8" + segment + b"\xff\xd9" + + assert materialize_module._jpeg_has_exif_datetime(jpeg_bytes) is True + + def test_apply_local_metadata_skips_mutations_for_preview_modes() -> None: """Dry-run and filename-only previews must not mutate existing local files.""" @@ -666,6 +1004,8 @@ def test_run_photo_sync_recent_uses_asset_date_when_added_date_missing() -> None asset_date=datetime.now(timezone.utc) - timedelta(days=10), added_date=None, ) + recent_asset.added_date = None + old_asset.added_date = None service = DummyService( DummyAlbum("All Photos", [recent_asset, old_asset]), cursor="cursor-recent-fallback", diff --git a/tests/test_cmdline.py b/tests/test_cmdline.py index 3f68134a..971b0c8b 100644 --- a/tests/test_cmdline.py +++ b/tests/test_cmdline.py @@ -152,7 +152,7 @@ def __init__(self, photo_id: str, filename: str) -> None: self.created = datetime(2026, 3, 1, tzinfo=timezone.utc) self.asset_date = self.created self.added_date = self.created - self.size = 1234 + self.size = len(f"{photo_id}:original".encode()) self.dimensions = (1920, 1080) self.is_live_photo = False self.resources = { @@ -3398,24 +3398,29 @@ def test_photos_watch_command_reports_progress_in_text_mode() -> None: output_dir = TEST_ROOT / "photos-watch-progress-output" state_dir = TEST_ROOT / "photos-watch-progress-state" - result = _invoke( - fake_api, - "photos", - "watch", - "--directory", - str(output_dir), - "--state-dir", - str(state_dir), - "--interval", - "1", - "--iterations", - "2", - ) + with patch("pyicloud.cli.commands.photos.time.sleep") as sleep: + result = _invoke( + fake_api, + "photos", + "watch", + "--directory", + str(output_dir), + "--state-dir", + str(state_dir), + "--interval", + "1", + "--iterations", + "2", + ) assert result.exit_code == 0 assert "Starting photo watch run 1 of 2" in result.stdout assert "Waiting 1s before photo watch run 2 of 2" in result.stdout assert "Starting photo watch run 2 of 2" in result.stdout + assert result.stdout.index( + "Waiting 1s before photo watch run 2 of 2" + ) < result.stdout.index("Starting photo watch run 2 of 2") + sleep.assert_called_once_with(1) assert "Photo Watch Run 1" in result.stdout assert "Photo Watch Run 2" in result.stdout