[torchax] Support for JittableModule::state_dict()

zmelumian · zmelumian · commit 0c045cbf2586 · 2025-05-20T14:08:10.000+03:00
diff --git a/torchax/test/test_statedict.py b/torchax/test/test_statedict.py
@@ -0,0 +1,36 @@
+
+    
+import unittest
+import torch
+from torch.utils import _pytree as pytree
+
+from torchax import (
+    interop,
+    mesh_util,
+    tensor
+)
+
+
+class Model(torch.nn.Module):
+    def __init__(self):
+        super(Model, self).__init__()
+        self.linear = torch.nn.Linear(10, 5)
+
+    def forward(self, x):
+        return self.linear(x)
+    
+
+class TestTensorStateDict(unittest.TestCase):
+    def test_load_statedict(self):
+        mesh = mesh_util.Mesh.fsdp_mesh()
+        model = mesh.initialize_model_sharded(Model, ())
+        model = interop.JittableModule(model)        
+        state_dict = model.cpu_state_dict()
+        is_xla_tensor = pytree.tree_map(
+            lambda t: isinstance(t, tensor.Tensor),
+            state_dict
+        )
+        assert not any(is_xla_tensor.values()), "State dict should not contain XLA tensors"
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/torchax/torchax/interop.py b/torchax/torchax/interop.py
@@ -125,6 +125,14 @@ def call(*args, **kwargs):
       return jitted(self.params, self.buffers, *args, **kwargs)
 
     self._jitted[key] = call
+    
+  def cpu_state_dict(self, *args, **kwargs):
+    state_dict = super().state_dict(*args, **kwargs)
+    state_dict = pytree.tree_map(
+      lambda t: t.cpu(),
+      state_dict
+    )
+    return state_dict
 
 
 class CompileMixin: