[BugFix] Fix collector devices

vmoens · vmoens · commit 888095fd538a · 2025-10-25T09:23:40.000-07:00
ghstack-source-id: 21eb8cc Pull-Request: #3223
diff --git a/test/test_collector.py b/test/test_collector.py
@@ -79,6 +79,7 @@
     RandomPolicy,
 )
 from torchrl.modules import Actor, OrnsteinUhlenbeckProcessModule, SafeModule
+from torchrl.weight_update import SharedMemWeightSyncScheme
 
 if os.getenv("PYTORCH_TEST_FBCODE"):
     IS_FB = True
@@ -3835,14 +3836,26 @@ def all_worker_ids(self) -> list[int] | list[torch.device]:
 
     @pytest.mark.skipif(not _has_cuda, reason="requires cuda another device than CPU.")
     @pytest.mark.skipif(not _has_gym, reason="requires gym")
-    def test_weight_update(self):
+    @pytest.mark.parametrize(
+        "weight_updater", ["scheme_shared", "scheme_pipe", "weight_updater"]
+    )
+    def test_weight_update(self, weight_updater):
         device = "cuda:0"
         env_maker = lambda: GymEnv(PENDULUM_VERSIONED(), device="cpu")
         policy_factory = lambda: TensorDictModule(
-            nn.Linear(3, 1), in_keys=["observation"], out_keys=["action"]
-        ).to(device)
+            nn.Linear(3, 1, device=device), in_keys=["observation"], out_keys=["action"]
+        )
         policy = policy_factory()
         policy_weights = TensorDict.from_module(policy)
+        kwargs = {}
+        if weight_updater == "scheme_shared":
+            kwargs = {"weight_sync_schemes": {"policy": SharedMemWeightSyncScheme()}}
+        elif weight_updater == "scheme_pipe":
+            kwargs = {"weight_sync_schemes": {"policy": SharedMemWeightSyncScheme()}}
+        elif weight_updater == "weight_updater":
+            kwargs = {"weight_updater": self.MPSWeightUpdaterBase(policy_weights, 2)}
+        else:
+            raise NotImplementedError
 
         collector = MultiSyncDataCollector(
             create_env_fn=[env_maker, env_maker],
@@ -3854,7 +3867,7 @@ def test_weight_update(self):
             reset_at_each_iter=False,
             device=device,
             storing_device="cpu",
-            weight_updater=self.MPSWeightUpdaterBase(policy_weights, 2),
+            **kwargs,
         )
 
         collector.update_policy_weights_()
diff --git a/torchrl/weight_update/weight_sync_schemes.py b/torchrl/weight_update/weight_sync_schemes.py
@@ -687,36 +687,14 @@ def apply_weights(self, destination: Any, weights: Any) -> None:
 
         # Auto-detect format from weights type
         if isinstance(weights, dict):
-            # Apply state_dict format
-            if isinstance(destination, nn.Module):
-                destination.load_state_dict(weights)
-            elif isinstance(destination, dict):
-                destination = TensorDict(destination)
-                weights = TensorDict(weights)
-                destination.data.update_(weights.data)
-            elif isinstance(destination, TensorDictBase):
-                weights_td = TensorDict(weights)
-                if (dest_keys := sorted(destination.keys(True, True))) != sorted(
-                    weights.keys(True, True)
-                ):
-                    weights_td = weights_td.unflatten_keys(".")
-                    weights_keys = sorted(weights_td.keys(True, True))
-                    if dest_keys != weights_keys:
-                        raise ValueError(
-                            f"The keys of the weights and destination do not match: {dest_keys} != {weights_keys}"
-                        )
-                destination.data.update_(weights_td.data)
-            else:
-                raise ValueError(
-                    f"Unsupported destination type for state_dict: {type(destination)}"
-                )
-        elif isinstance(weights, TensorDictBase):
+            weights = TensorDict(weights).unflatten_keys(".")
+
+        if isinstance(weights, TensorDictBase):
             # Apply TensorDict format
             if isinstance(destination, nn.Module):
-                weights.to_module(destination)
-            elif isinstance(destination, TensorDictBase):
-                destination.data.update_(weights.data)
-            elif isinstance(destination, dict):
+                destination = TensorDict.from_module(destination)
+
+            if isinstance(destination, dict):
                 destination_td = TensorDict(destination)
                 if (dest_keys := sorted(destination_td.keys(True, True))) != sorted(
                     weights.keys(True, True)
@@ -727,11 +705,15 @@ def apply_weights(self, destination: Any, weights: Any) -> None:
                         raise ValueError(
                             f"The keys of the weights and destination do not match: {dest_keys} != {weights_keys}"
                         )
-                destination_td.data.update_(weights.data)
+                destination = destination_td
+
+            if isinstance(destination, TensorDictBase):
+                destination.data.update_(weights.data)
             else:
                 raise ValueError(
                     f"Unsupported destination type for TensorDict: {type(destination)}"
                 )
+
         else:
             raise ValueError(
                 f"Unsupported weights type: {type(weights)}. Expected dict or TensorDictBase."