SpecLad
diff --git a/‎changelog.d/20251114_144533_roman_heavyweight_backup_sparse.md‎
Lines changed: 8 additions & 0 deletions b/‎changelog.d/20251114_144533_roman_heavyweight_backup_sparse.md‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎cvat/apps/engine/backup.py‎
Lines changed: 49 additions & 1 deletion b/‎cvat/apps/engine/backup.py‎
Lines changed: 49 additions & 1 deletion
diff --git a/‎tests/docker-compose.minio.yml‎
Lines changed: 2 additions & 2 deletions b/‎tests/docker-compose.minio.yml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎tests/mounted_file_share/images/image_0.jpg‎
7.88 KB b/‎tests/mounted_file_share/images/image_0.jpg‎
7.88 KB
diff --git a/‎tests/mounted_file_share/images/image_4.jpg‎
7.88 KB b/‎tests/mounted_file_share/images/image_4.jpg‎
7.88 KB
diff --git a/‎tests/mounted_file_share/images/image_5.jpg‎
7.88 KB b/‎tests/mounted_file_share/images/image_5.jpg‎
7.88 KB
diff --git a/‎tests/python/rest_api/test_tasks.py‎
Lines changed: 18 additions & 8 deletions b/‎tests/python/rest_api/test_tasks.py‎
Lines changed: 18 additions & 8 deletions
@@ -0,0 +1,8 @@
+### Fixed
+
+- Heavyweight backups created from tasks using cloud storage that have
+  images as frames and non-default start frame, stop frame or frame step
+  settings no longer fail to import. Note that the fix is for backup
+  creation; as such, CVAT will still not be able to import backups of
+  such tasks created by previous versions
+  (<https://github.com/cvat-ai/cvat/pull/10004>)
@@ -423,6 +423,7 @@ def __init__(self, pk, version=Version.V1, *, lightweight: bool):
         )
         self._label_mapping = _get_label_mapping(db_labels)
         self._lightweight = lightweight
+        self._manifest_was_filtered = False
 
     def _write_annotation_guide(self, zip_object: ZipFile, target_dir: str) -> None:
         annotation_guide = (
@@ -436,6 +437,46 @@ def _write_annotation_guide(self, zip_object: ZipFile, target_dir: str) -> None:
             target_dir=target_dir,
         )
 
+    def _write_filtered_media_manifest(self, zip_object: ZipFile, target_dir: str) -> None:
+        # When making a heavyweight backup of a task with images, we only include those frames
+        # that match the task's frame range. This function filters the manifest so that it also
+        # includes only those frames. That way, we don't have a manifest referencing nonexistent
+        # images in the backup.
+
+        target_data_dir = os.path.join(target_dir, self.DATA_DIRNAME)
+
+        if hasattr(self._db_data, "video"):
+            # No filtering necessary; just use the original manifest.
+            self._write_files(
+                source_dir=self._db_data.get_upload_dirname(),
+                zip_object=zip_object,
+                files=[self._db_data.get_manifest_path()],
+                target_dir=target_data_dir,
+            )
+            return
+
+        with tempfile.TemporaryDirectory() as tmp_dir:
+            present_frame_nums = {im.frame for im in self._db_data.images.all()}
+
+            filtered_manifest_path = os.path.join(tmp_dir, self.MEDIA_MANIFEST_FILENAME)
+
+            imm_original = ImageManifestManager(
+                self._db_data.get_manifest_path(), create_index=False
+            )
+            imm_filtered = ImageManifestManager(filtered_manifest_path, create_index=False)
+            imm_filtered.create(
+                entry for frame_num, entry in imm_original if frame_num in present_frame_nums
+            )
+
+            self._write_files(
+                source_dir=tmp_dir,
+                zip_object=zip_object,
+                files=[filtered_manifest_path],
+                target_dir=target_data_dir,
+            )
+
+            self._manifest_was_filtered = True
+
     def _write_data(self, zip_object: ZipFile, target_dir: str) -> None:
         target_data_dir = os.path.join(target_dir, self.DATA_DIRNAME)
 
@@ -480,7 +521,9 @@ def _write_data(self, zip_object: ZipFile, target_dir: str) -> None:
                     target_dir=target_data_dir,
                 )
             else:
-                files_for_local_copy = [self._db_data.get_manifest_path()]
+                self._write_filtered_media_manifest(zip_object=zip_object, target_dir=target_dir)
+
+                files_for_local_copy = []
 
                 media_files_to_download = []
                 for media_file in self._db_data.related_files.all():
@@ -669,6 +712,11 @@ def serialize_data():
             else:
                 data["storage"] = self._db_data.storage
 
+            if self._manifest_was_filtered:
+                del data["start_frame"]
+                del data["stop_frame"]
+                del data["frame_filter"]
+
             return self._prepare_data_meta(data)
 
         task = serialize_task()
 
@@ -49,7 +49,7 @@ services:
       IMPORT_EXPORT_BUCKET: "importexportbucket"
     volumes:
       - ./tests/cypress/e2e/actions_tasks/assets/case_65_manifest/:/mnt/images_with_manifest:ro
-      - ./tests/mounted_file_share/pcd_with_related/:/mnt/pcd_with_related:ro
+      - ./tests/mounted_file_share/:/mnt/mounted_file_share:ro
     networks:
       - cvat
     entrypoint: >
@@ -64,7 +64,7 @@ services:
           else
             FULL_PATH=$${BUCKET};
           fi
-          $${MC_PATH} cp --recursive /mnt/ $${FULL_PATH};
+          $${MC_PATH} cp --recursive /mnt/mounted_file_share/* /mnt/images_with_manifest/ $${FULL_PATH};
           for i in 1 2;
           do
               $${MC_PATH} cp /mnt/images_with_manifest/manifest.jsonl $${FULL_PATH}/images_with_manifest/manifest_$${i}.jsonl;
 
@@ -1301,10 +1301,6 @@ def test_cannot_export_backup_for_task_without_data(self, tasks):
     @pytest.mark.with_external_services
     @pytest.mark.parametrize("lightweight_backup", [True, False])
     def test_can_export_and_import_backup_task_with_cloud_storage(self, lightweight_backup):
-        cloud_storage_content = [
-            "images_with_manifest/image_case_65_1.png",
-            "images_with_manifest/image_case_65_2.png",
-        ]
         task_spec = {
             "name": "Task with files from cloud storage",
             "labels": [
@@ -1317,7 +1313,10 @@ def test_can_export_and_import_backup_task_with_cloud_storage(self, lightweight_
             "image_quality": 75,
             "use_cache": False,
             "cloud_storage_id": 1,
-            "server_files": cloud_storage_content,
+            "server_files": [f"images/image_{i}.jpg" for i in range(0, 6)],
+            "start_frame": 1,
+            "stop_frame": 4,
+            "frame_filter": "step=2",
         }
         task_id, _ = create_task(self.user, task_spec, data_spec)
 
@@ -1335,7 +1334,7 @@ def test_can_export_and_import_backup_task_with_cloud_storage(self, lightweight_
 
         expected_media = {"manifest.jsonl"}
         if not lightweight_backup:
-            expected_media.update(cloud_storage_content)
+            expected_media.update(["images/image_1.jpg", "images/image_3.jpg"])
         assert files_in_data == expected_media
 
         self._test_can_restore_task_from_backup(task_id, lightweight_backup=lightweight_backup)
@@ -1401,9 +1400,20 @@ def _test_can_restore_task_from_backup(self, task_id: int, lightweight_backup: b
         exclude_regex_paths = [r"root\['chunks_updated_date'\]"]  # must be different
 
         if old_meta["storage"] == "cloud_storage":
-            assert new_meta["storage"] == ("cloud_storage" if lightweight_backup else "local")
             assert new_meta["cloud_storage_id"] is None
-            exclude_regex_paths.extend([r"root\['cloud_storage_id'\]", r"root\['storage'\]"])
+            exclude_regex_paths.append(r"root\['cloud_storage_id'\]")
+
+            if not lightweight_backup:
+                assert new_meta["storage"] == "local"
+                assert new_meta["start_frame"] == 0
+                assert new_meta["stop_frame"] == len(old_meta["frames"]) - 1
+                assert new_meta["frame_filter"] == ""
+                exclude_regex_paths += [
+                    r"root\['storage'\]",
+                    r"root\['start_frame'\]",
+                    r"root\['stop_frame'\]",
+                    r"root\['frame_filter'\]",
+                ]
 
         assert (
             DeepDiff(