Merge pull request #2611 from bghira/bugfix/flux2-ramtorch

bghira · web-flow · commit 742f30b0631f · 2026-02-12T15:37:34.000-06:00
flux2: fix ramtorch validation by checking device location correctly
diff --git a/simpletuner/helpers/models/wan/model.py b/simpletuner/helpers/models/wan/model.py
@@ -1357,10 +1357,11 @@ def _fixed_execution_device(self):
         """
         Fixed _execution_device property that returns the transformer device instead of meta.
         This fixes the issue when text encoder is moved to meta but transformer is on GPU.
+        Uses .device property (not raw parameter device) so ramtorch-aware patches apply.
         """
         # If we have a transformer and it's not on meta, use its device
         if hasattr(self, "transformer") and self.transformer is not None:
-            transformer_device = next(self.transformer.parameters()).device
+            transformer_device = self.transformer.device
             if transformer_device.type != "meta":
                 return transformer_device
 
diff --git a/simpletuner/helpers/ramtorch_extensions.py b/simpletuner/helpers/ramtorch_extensions.py
@@ -789,3 +789,51 @@ def remove_ramtorch_sync_hooks(hooks: list) -> None:
     """Remove synchronization hooks added by add_ramtorch_sync_hooks."""
     for h in hooks:
         h.remove()
+
+
+def get_ramtorch_target_device(model: nn.Module) -> torch.device | None:
+    """Return the target GPU device from a model's ramtorch modules, or None.
+
+    Returns the device of the first ramtorch module found.  All ramtorch
+    modules within a model share the same target device because
+    ``replace_linear_layers_with_ramtorch`` applies a single ``device``
+    argument to every replaced layer.
+
+    Returns ``None`` when the model contains no ramtorch modules.
+    """
+    for m in model.modules():
+        if getattr(m, "is_ramtorch", False):
+            dev = m.device
+            return torch.device(dev) if isinstance(dev, str) else dev
+    return None
+
+
+_model_device_patched = False
+
+
+def patch_model_device_for_ramtorch():
+    """
+    Patch ModelMixin.device so it returns the ramtorch target GPU device
+    instead of CPU when ramtorch modules are present.
+
+    This single patch fixes:
+    - DiffusionPipeline._execution_device (delegates to pipeline.device -> model.device)
+    - Direct self.transformer.device / self.unet.device references in pipeline code
+    """
+    global _model_device_patched
+    if _model_device_patched:
+        return
+    _model_device_patched = True
+
+    from diffusers import ModelMixin
+
+    original_device = ModelMixin.device
+
+    @property
+    def device(self) -> torch.device:
+        dev = get_ramtorch_target_device(self)
+        if dev is not None:
+            return dev
+        return original_device.fget(self)
+
+    ModelMixin.device = device
diff --git a/simpletuner/helpers/training/validation.py b/simpletuner/helpers/training/validation.py
@@ -2131,6 +2131,13 @@ def setup_pipeline(self, validation_type):
             if getattr(te, "device", None) and te.device.type == "meta":
                 setattr(self.model.pipeline, attr, None)
 
+        # Patch ModelMixin.device so ramtorch models report the target GPU instead of CPU.
+        # Must run before pipeline.to() and pipeline.__call__ which rely on device detection.
+        if getattr(self.config, "ramtorch", False):
+            from simpletuner.helpers.ramtorch_extensions import patch_model_device_for_ramtorch
+
+            patch_model_device_for_ramtorch()
+
         # For FSDP models, skip .to() call - DTensor parameters are already device-aware
         # and calling .to() causes: "RuntimeError: Attempted to set the storage of a tensor
         # on device 'cpu' to a storage on different device 'cuda:0'"