voxel51 · jacobsela · Aug 25, 2025 · Aug 25, 2025 · coderabbitai · Aug 25, 2025
diff --git a/fiftyone/core/models.py b/fiftyone/core/models.py
@@ -37,6 +37,8 @@
 foutr = fou.lazy_import("fiftyone.utils.transformers")
 fouu = fou.lazy_import("fiftyone.utils.ultralytics")
 
+foray = fou.lazy_import("fiftyone.core.ray.base")
+foray_writers = fou.lazy_import("fiftyone.core.ray.writers")
 
 logger = logging.getLogger(__name__)
 
@@ -459,7 +461,18 @@ def _apply_image_model_data_loader(
 
     with contextlib.ExitStack() as context:
         pb = context.enter_context(fou.ProgressBar(samples, progress=progress))
-        ctx = context.enter_context(foc.SaveContext(samples))
+        output_processor = model._output_processor
+        ctx = context.enter_context(
+            foray.ActorPoolContext(
+                samples,
+                foray_writers.LabelWriter,
+                num_workers=16,
+                label_field=label_field,
+                confidence_thresh=confidence_thresh,
+                post_processor=output_processor,
+            )
+        )
+        context.enter_context(fou.SetAttributes(model, _output_processor=None))
 
         for sample_batch, imgs in zip(
             fou.iter_batches(samples, batch_size),
@@ -470,22 +483,13 @@ def _apply_image_model_data_loader(
                     raise imgs
 
                 if needs_samples:
-                    labels_batch = model.predict_all(
+                    ids, labels_batch = model.predict_all(
                         imgs, samples=sample_batch
                     )
                 else:
-                    labels_batch = model.predict_all(imgs)
-
-                for sample, labels in zip(sample_batch, labels_batch):
-                    if filename_maker is not None:
-                        _export_arrays(labels, sample.filepath, filename_maker)
+                    ids, labels_batch = model.predict_all(imgs)
 
-                    sample.add_labels(
-                        labels,
-                        label_field=label_field,
-                        confidence_thresh=confidence_thresh,
-                    )
-                    ctx.save(sample)
+                ctx.submit(ids, labels_batch)
 
             except Exception as e:
                 if not skip_failures:

@@ -0,0 +1,4 @@
+import ray
+
+if not ray.is_initialized():
+    ray.init()
-if not ray.is_initialized():
-    ray.init()
+import ray
+
+def ensure_ray_initialized(**kwargs):
+    """Initialize Ray if not already initialized.
+    
+    Args:
+        **kwargs: Optional Ray initialization parameters
+    """
+    if not ray.is_initialized():
+        ray.init(**kwargs)
-if not ray.is_initialized():
-    ray.init()
+import ray
+
+def ensure_ray_initialized(**kwargs):
+    """Initialize Ray if not already initialized.
+    
+    Args:
+        **kwargs: Optional Ray initialization parameters
+    """
+    if not ray.is_initialized():
+        ray.init(**kwargs)
@@ -0,0 +1,72 @@
+import ray
+
+import fiftyone.core.view as fov
+
+
+def serialize_samples(samples):
+    dataset_name = samples._root_dataset.name
+    stages = (
+        samples._serialize() if isinstance(samples, fov.DatasetView) else None
+    )
+    return dataset_name, stages
+
+
+def deserialize_samples(serialized_samples):
+    import fiftyone as fo
+
+    dataset_name, stages = serialized_samples
+
+    dataset = fo.load_dataset(dataset_name)
+    if stages is not None:
+        return fov.DatasetView._build(dataset, stages)
+    return dataset
+
+
+class FiftyOneActor:
+    """Class for FiftyOne Ray actors.
+
+    Args:
+        serialized_samples: a serialized representation of a
+            :class:`fiftyone.core.collections.SampleCollection`
+    """
+
+    def __init__(self, serialized_samples, **kwargs):
+        super().__init__(**kwargs)
+        self.samples = deserialize_samples(serialized_samples)
-    def __init__(self, serialized_samples, **kwargs):
-        super().__init__(**kwargs)
-        self.samples = deserialize_samples(serialized_samples)
+    def __init__(self, serialized_samples, **kwargs):
+        # Don't pass kwargs to object.__init__ (TypeError). Store for subclasses.
+        super().__init__()
+        self.samples = deserialize_samples(serialized_samples)
+        self._init_kwargs = kwargs
-    def __init__(self, serialized_samples, **kwargs):
-        super().__init__(**kwargs)
-        self.samples = deserialize_samples(serialized_samples)
+    def __init__(self, serialized_samples, **kwargs):
+        # Don't pass kwargs to object.__init__ (TypeError). Store for subclasses.
+        super().__init__()
+        self.samples = deserialize_samples(serialized_samples)
+        self._init_kwargs = kwargs
+
+
+class ActorPoolContext:
+    """Context manager for a pool of Ray actors.
+
+    Args:
+        samples: a :class:`fiftyone.core.collections.SampleCollection`
+        actor_type: the :class:`FiftyOneActor` subclass to instantiate
+            for each worker
+        num_workers (int): the number of workers in the pool
+    """
+
+    def __init__(self, samples, actor_type, *args, num_workers=4, **kwargs):
+        super().__init__()
+        self.serialized_samples_ref = ray.put(serialize_samples(samples))
+        self.num_workers = num_workers
+        self.actor_type = actor_type
+        self.actors = [
+            self.actor_type.remote(
+                self.serialized_samples_ref, *args, **kwargs
+            )
+            for _ in range(self.num_workers)
+        ]
+        self.pool = ray.util.ActorPool(self.actors)
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, *args):
+        # Clean up refs
+        for actor in self.actors:
+            del actor
+
+        del self.serialized_samples_ref
+
-    def __exit__(self, *args):
-        # Clean up refs
-        for actor in self.actors:
-            del actor
-
-        del self.serialized_samples_ref
+    def __exit__(self, *args):
+        # Drain any pending results so ActorPool marks actors idle
+        try:
+            while hasattr(self.pool, "has_next") and self.pool.has_next():
+                self.pool.get_next_unordered()
+        except Exception:
+            pass
+        # Explicitly terminate remote actors
+        for actor in getattr(self, "actors", []):
+            try:
+                ray.kill(actor)
+            except Exception:
+                pass
+        # Release references
+        try:
+            self.actors.clear()
+        except Exception:
+            pass
+        self.pool = None
+        try:
+            del self.serialized_samples_ref
+        except Exception:
+            pass
-    def __exit__(self, *args):
-        # Clean up refs
-        for actor in self.actors:
-            del actor
-
-        del self.serialized_samples_ref
+    def __exit__(self, *args):
+        # Drain any pending results so ActorPool marks actors idle
+        try:
+            while hasattr(self.pool, "has_next") and self.pool.has_next():
+                self.pool.get_next_unordered()
+        except Exception:
+            pass
+        # Explicitly terminate remote actors
+        for actor in getattr(self, "actors", []):
+            try:
+                ray.kill(actor)
+            except Exception:
+                pass
+        # Release references
+        try:
+            self.actors.clear()
+        except Exception:
+            pass
+        self.pool = None
+        try:
+            del self.serialized_samples_ref
+        except Exception:
+            pass
+    def submit(self, ids, payloads):
+        self.pool.submit(lambda a, v: a.run.remote(*v), (ids, payloads))
@@ -0,0 +1,40 @@
+import ray
+import torch
+
+import fiftyone.core.ray.base as foray
+import fiftyone.core.collections as foc
+from fiftyone.core.ray.base import FiftyOneActor
+
+
+@ray.remote
+class LabelWriter(FiftyOneActor):
+    def __init__(
+        self,
+        serialized_samples,
+        label_field,
+        confidence_thresh=None,
+        post_processor=None,
+        **kwargs
+    ):
+        super().__init__(serialized_samples, **kwargs)
+        self.label_field = label_field
+        self.confidence_thresh = confidence_thresh
+        self.post_processor = post_processor
+        self.ctx = foc.SaveContext(self.samples)
+
+    def run(self, ids, payloads):
+        samples_batch = self.samples.select(ids)
+
+        if self.post_processor is not None:
+            payloads = self.post_processor(
+                *payloads, confidence_thresh=self.confidence_thresh
+            )
+
+        with self.ctx:
+            for sample, payload in zip(samples_batch, payloads):
+                sample.add_labels(
+                    payload,
+                    label_field=self.label_field,
+                    confidence_thresh=self.confidence_thresh,
+                )
+                self.ctx.save(sample)
-    def run(self, ids, payloads):
-        samples_batch = self.samples.select(ids)
-
-        if self.post_processor is not None:
-            payloads = self.post_processor(
-                *payloads, confidence_thresh=self.confidence_thresh
-            )
-
-        with self.ctx:
-            for sample, payload in zip(samples_batch, payloads):
-                sample.add_labels(
-                    payload,
-                    label_field=self.label_field,
-                    confidence_thresh=self.confidence_thresh,
-                )
-                self.ctx.save(sample)
+    def run(self, ids, payloads):
+        if ids is None or not ids:
+            return  # Nothing to process
+
+        samples_batch = self.samples.select(ids)
+
+        # Validate that we got the expected number of samples
+        if len(samples_batch) != len(ids):
+            raise ValueError(
+                f"Expected {len(ids)} samples but got {len(samples_batch)}"
+            )
+
+        if self.post_processor is not None:
+            try:
+                payloads = self.post_processor(
+                    *payloads, confidence_thresh=self.confidence_thresh
+                )
+            except Exception as e:
+                raise RuntimeError(f"Post-processor failed: {e}") from e
+
+        # Ensure payloads matches samples count after processing
+        if len(payloads) != len(samples_batch):
+            raise ValueError(
+                f"Payload count {len(payloads)} doesn't match sample count {len(samples_batch)}"
+            )
+
+        with self.ctx:
+            for sample, payload in zip(samples_batch, payloads):
+                try:
+                    sample.add_labels(
+                        payload,
+                        label_field=self.label_field,
+                        confidence_thresh=self.confidence_thresh,
+                    )
+                    self.ctx.save(sample)
+                except Exception as e:
+                    # Log error but continue processing other samples
+                    print(f"Failed to save labels for sample {sample.id}: {e}")
+                    # Or re-raise if you want to fail the entire batch
+                    # raise
-    def run(self, ids, payloads):
-        samples_batch = self.samples.select(ids)
-
-        if self.post_processor is not None:
-            payloads = self.post_processor(
-                *payloads, confidence_thresh=self.confidence_thresh
-            )
-
-        with self.ctx:
-            for sample, payload in zip(samples_batch, payloads):
-                sample.add_labels(
-                    payload,
-                    label_field=self.label_field,
-                    confidence_thresh=self.confidence_thresh,
-                )
-                self.ctx.save(sample)
+    def run(self, ids, payloads):
+        if ids is None or not ids:
+            return  # Nothing to process
+
+        samples_batch = self.samples.select(ids)
+
+        # Validate that we got the expected number of samples
+        if len(samples_batch) != len(ids):
+            raise ValueError(
+                f"Expected {len(ids)} samples but got {len(samples_batch)}"
+            )
+
+        if self.post_processor is not None:
+            try:
+                payloads = self.post_processor(
+                    *payloads, confidence_thresh=self.confidence_thresh
+                )
+            except Exception as e:
+                raise RuntimeError(f"Post-processor failed: {e}") from e
+
+        # Ensure payloads matches samples count after processing
+        if len(payloads) != len(samples_batch):
+            raise ValueError(
+                f"Payload count {len(payloads)} doesn't match sample count {len(samples_batch)}"
+            )
+
+        with self.ctx:
+            for sample, payload in zip(samples_batch, payloads):
+                try:
+                    sample.add_labels(
+                        payload,
+                        label_field=self.label_field,
+                        confidence_thresh=self.confidence_thresh,
+                    )
+                    self.ctx.save(sample)
+                except Exception as e:
+                    # Log error but continue processing other samples
+                    print(f"Failed to save labels for sample {sample.id}: {e}")
+                    # Or re-raise if you want to fail the entire batch
+                    # raise
@@ -883,7 +883,8 @@ def predict_all(self, imgs):
             of dicts of :class:`fiftyone.core.labels.Label` instances
             containing the predictions
         """
-        return self._predict_all(imgs)
+        ids = imgs.pop("_id", None)
+        return ids, self._predict_all(imgs)
 
     def _predict_all(self, imgs):
         if self._preprocess and self._transforms is not None:
@@ -921,9 +922,9 @@ def _predict_all(self, imgs):
 
         if self._output_processor is None:
             if isinstance(output, torch.Tensor):
-                output = output.detach().cpu().numpy()
+                output = output.detach().cpu()
 
-            return output
+            return output, (width, height)
 
         if self.has_logits:
             self._output_processor.store_logits = self.store_logits
@@ -1913,17 +1914,20 @@ def __getitem__(self, idx):
         return self.__getitems__([idx])[0]
 
     def __getitems__(self, indices):
+        _ids = [self.ids[idx] for idx in indices]
         if self.vectorize:
             batch = self._prepare_batch_vectorized(indices)
         else:
             batch = self._prepare_batch_db(indices)
 
         res = []
-        for d in batch:
+        for i, d in enumerate(batch):
             if isinstance(d, Exception):
                 res.append(d)
             else:
-                res.append(self._get_item(d))
+                _processed = self._get_item(d)
+                _processed.update({"_id": _ids[i]})
+                res.append(_processed)
 
         return res
 

@@ -681,7 +681,17 @@ def _predict_all(self, args):
             )
 
         else:
-            return output
+            for k, v in output.items():
+                if isinstance(v, torch.Tensor):
+                    output[k] = v.detach().cpu()
+                elif isinstance(v, (tuple, list)):
+                    output[k] = [
+                        i.detach().cpu()
+                        for i in v
+                        if isinstance(i, torch.Tensor)
+                    ]
+
+            return output, image_sizes
 
     def _forward_pass(self, args):
         return self._model(
@@ -709,6 +719,10 @@ def collate_fn(batch):
         keys = batch[0].keys()
         res = {}
         for k in keys:
+            if not isinstance(batch[0][k], (torch.Tensor, np.ndarray)):
+                # not a tensor, just return the list
+                res[k] = [b[k] for b in batch]
+                continue
             # Gather shapes for dimension analysis
             shapes = [b[k].shape for b in batch]
             # Find the max size in each dimension