pytorch
diff --git a/‎examples/collectors/weight_sync_collectors.py‎
Lines changed: 34 additions & 34 deletions b/‎examples/collectors/weight_sync_collectors.py‎
Lines changed: 34 additions & 34 deletions
diff --git a/‎examples/collectors/weight_sync_standalone.py‎
Lines changed: 54 additions & 48 deletions b/‎examples/collectors/weight_sync_standalone.py‎
Lines changed: 54 additions & 48 deletions
@@ -17,7 +17,7 @@
 import torch.nn as nn
 from tensordict import TensorDict
 from tensordict.nn import TensorDictModule
-from torchrl.collectors import SyncDataCollector, MultiSyncDataCollector
+from torchrl.collectors import MultiSyncDataCollector, SyncDataCollector
 from torchrl.envs import GymEnv
 from torchrl.weight_update import (
     MultiProcessWeightSyncScheme,
@@ -27,25 +27,24 @@
 
 def example_single_collector_multiprocess():
     """Example 1: Single collector with multiprocess scheme."""
-    print("\n" + "="*70)
+    print("\n" + "=" * 70)
     print("Example 1: Single Collector with Multiprocess Scheme")
-    print("="*70)
-    
+    print("=" * 70)
+
     # Create environment and policy
     env = GymEnv("CartPole-v1")
     policy = TensorDictModule(
         nn.Linear(
-            env.observation_spec["observation"].shape[-1],
-            env.action_spec.shape[-1]
+            env.observation_spec["observation"].shape[-1], env.action_spec.shape[-1]
         ),
         in_keys=["observation"],
         out_keys=["action"],
     )
     env.close()
-    
+
     # Create weight sync scheme
     scheme = MultiProcessWeightSyncScheme(strategy="state_dict")
-    
+
     print("Creating collector with multiprocess weight sync...")
     collector = SyncDataCollector(
         create_env_fn=lambda: GymEnv("CartPole-v1"),
@@ -54,46 +53,45 @@ def example_single_collector_multiprocess():
         total_frames=200,
         weight_sync_schemes={"policy": scheme},
     )
-    
+
     # Collect data and update weights periodically
     print("Collecting data...")
     for i, data in enumerate(collector):
         print(f"Iteration {i}: Collected {data.numel()} transitions")
-        
+
         # Update policy weights every 2 iterations
         if i % 2 == 0:
             new_weights = policy.state_dict()
             collector.update_policy_weights_(new_weights)
             print("  → Updated policy weights")
-        
+
         if i >= 2:  # Just run a few iterations for demo
             break
-    
+
     collector.shutdown()
     print("✓ Single collector example completed!\n")
 
 
 def example_multi_collector_shared_memory():
     """Example 2: Multiple collectors with shared memory."""
-    print("\n" + "="*70)
+    print("\n" + "=" * 70)
     print("Example 2: Multiple Collectors with Shared Memory")
-    print("="*70)
-    
+    print("=" * 70)
+
     # Create environment and policy
     env = GymEnv("CartPole-v1")
     policy = TensorDictModule(
         nn.Linear(
-            env.observation_spec["observation"].shape[-1],
-            env.action_spec.shape[-1]
+            env.observation_spec["observation"].shape[-1], env.action_spec.shape[-1]
         ),
         in_keys=["observation"],
         out_keys=["action"],
     )
     env.close()
-    
+
     # Shared memory is more efficient for frequent updates
     scheme = SharedMemWeightSyncScheme(strategy="tensordict", auto_register=True)
-    
+
     print("Creating multi-collector with shared memory...")
     collector = MultiSyncDataCollector(
         create_env_fn=[
@@ -106,49 +104,51 @@ def example_multi_collector_shared_memory():
         total_frames=400,
         weight_sync_schemes={"policy": scheme},
     )
-    
+
     # Workers automatically see weight updates via shared memory
     print("Collecting data...")
     for i, data in enumerate(collector):
         print(f"Iteration {i}: Collected {data.numel()} transitions")
-        
+
         # Update weights frequently (shared memory makes this very fast)
         collector.update_policy_weights_(TensorDict.from_module(policy))
         print("  → Updated policy weights via shared memory")
-        
+
         if i >= 1:  # Just run a couple iterations for demo
             break
-    
+
     collector.shutdown()
     print("✓ Multi-collector with shared memory example completed!\n")
 
 
 def main():
     """Run all examples."""
-    print("\n" + "="*70)
+    print("\n" + "=" * 70)
     print("Weight Synchronization Schemes - Collector Integration Examples")
-    print("="*70)
-    
+    print("=" * 70)
+
     # Set multiprocessing start method
     import torch.multiprocessing as mp
+
     try:
-        mp.set_start_method('spawn')
+        mp.set_start_method("spawn")
     except RuntimeError:
         pass  # Already set
-    
+
     # Run examples
     example_single_collector_multiprocess()
     example_multi_collector_shared_memory()
-    
-    print("\n" + "="*70)
+
+    print("\n" + "=" * 70)
     print("All examples completed successfully!")
-    print("="*70)
+    print("=" * 70)
     print("\nKey takeaways:")
     print("  • MultiProcessWeightSyncScheme: Good for general multiprocess scenarios")
-    print("  • SharedMemWeightSyncScheme: Fast zero-copy updates for same-machine workers")
-    print("="*70 + "\n")
+    print(
+        "  • SharedMemWeightSyncScheme: Fast zero-copy updates for same-machine workers"
+    )
+    print("=" * 70 + "\n")
 
 
 if __name__ == "__main__":
     main()
-
@@ -16,8 +16,8 @@
 
 import torch
 import torch.nn as nn
-from torch import multiprocessing as mp
 from tensordict import TensorDict
+from torch import multiprocessing as mp
 from torchrl.weight_update import (
     MultiProcessWeightSyncScheme,
     SharedMemWeightSyncScheme,
@@ -27,21 +27,21 @@
 def worker_process_mp(child_pipe, model_state):
     """Worker process that receives weights via multiprocessing pipe."""
     print("Worker: Starting...")
-    
+
     # Create a policy on the worker side
     policy = nn.Linear(4, 2)
     with torch.no_grad():
         policy.weight.fill_(0.0)
         policy.bias.fill_(0.0)
-    
+
     # Create receiver and register the policy
     scheme = MultiProcessWeightSyncScheme(strategy="state_dict")
     receiver = scheme.create_receiver()
     receiver.register_model(policy)
     receiver.register_worker_transport(child_pipe)
-    
+
     print(f"Worker: Before update - weight sum: {policy.weight.sum().item():.4f}")
-    
+
     # Receive and apply weights
     result = receiver._transport.receive_weights(timeout=5.0)
     if result is not None:
@@ -50,19 +50,19 @@ def worker_process_mp(child_pipe, model_state):
         print(f"Worker: After update - weight sum: {policy.weight.sum().item():.4f}")
     else:
         print("Worker: No weights received")
-    
+
     # Store final state for verification
-    model_state['weight_sum'] = policy.weight.sum().item()
-    model_state['bias_sum'] = policy.bias.sum().item()
+    model_state["weight_sum"] = policy.weight.sum().item()
+    model_state["bias_sum"] = policy.bias.sum().item()
 
 
 def worker_process_shared_mem(child_pipe, model_state):
     """Worker process that receives shared memory buffer reference."""
     print("SharedMem Worker: Starting...")
-    
+
     # Create a policy on the worker side
     policy = nn.Linear(4, 2)
-    
+
     # Wait for shared memory buffer registration
     if child_pipe.poll(timeout=10.0):
         data, msg = child_pipe.recv()
@@ -73,129 +73,135 @@ def worker_process_shared_mem(child_pipe, model_state):
             shared_weights.to_module(policy)
             # Send acknowledgment
             child_pipe.send((None, "registered"))
-    
+
     # Small delay to ensure main process updates shared memory
     import time
+
     time.sleep(0.5)
-    
+
     print(f"SharedMem Worker: weight sum: {policy.weight.sum().item():.4f}")
-    
+
     # Store final state for verification
-    model_state['weight_sum'] = policy.weight.sum().item()
-    model_state['bias_sum'] = policy.bias.sum().item()
+    model_state["weight_sum"] = policy.weight.sum().item()
+    model_state["bias_sum"] = policy.bias.sum().item()
 
 
 def example_multiprocess_sync():
     """Example 1: Multiprocess weight synchronization with state_dict."""
-    print("\n" + "="*70)
+    print("\n" + "=" * 70)
     print("Example 1: Multiprocess Weight Synchronization")
-    print("="*70)
-    
+    print("=" * 70)
+
     # Create a simple policy on main process
     policy = nn.Linear(4, 2)
     with torch.no_grad():
         policy.weight.fill_(1.0)
         policy.bias.fill_(0.5)
-    
+
     print(f"Main: Policy weight sum: {policy.weight.sum().item():.4f}")
-    
+
     # Create scheme and sender
     scheme = MultiProcessWeightSyncScheme(strategy="state_dict")
     sender = scheme.create_sender()
-    
+
     # Create pipe for communication
     parent_pipe, child_pipe = mp.Pipe()
     sender.register_worker(worker_idx=0, pipe_or_context=parent_pipe)
-    
+
     # Start worker process
     manager = mp.Manager()
     model_state = manager.dict()
     process = mp.Process(target=worker_process_mp, args=(child_pipe, model_state))
     process.start()
-    
+
     # Send weights to worker
     weights = policy.state_dict()
     print("Main: Sending weights to worker...")
     sender.update_weights(weights)
-    
+
     # Wait for worker to complete
     process.join(timeout=10.0)
-    
+
     if process.is_alive():
         print("Warning: Worker process did not terminate in time")
         process.terminate()
     else:
-        print(f"Main: Worker completed. Worker's weight sum: {model_state['weight_sum']:.4f}")
+        print(
+            f"Main: Worker completed. Worker's weight sum: {model_state['weight_sum']:.4f}"
+        )
         print(f"✓ Weight synchronization successful!")
 
 
 def example_shared_memory_sync():
     """Example 2: Shared memory weight synchronization."""
-    print("\n" + "="*70)
+    print("\n" + "=" * 70)
     print("Example 2: Shared Memory Weight Synchronization")
-    print("="*70)
-    
+    print("=" * 70)
+
     # Create a simple policy
     policy = nn.Linear(4, 2)
-    
+
     # Create shared memory scheme with auto-registration
     scheme = SharedMemWeightSyncScheme(strategy="tensordict", auto_register=True)
     sender = scheme.create_sender()
-    
+
     # Create pipe for lazy registration
     parent_pipe, child_pipe = mp.Pipe()
     sender.register_worker(worker_idx=0, pipe_or_context=parent_pipe)
-    
+
     # Start worker process
     manager = mp.Manager()
     model_state = manager.dict()
-    process = mp.Process(target=worker_process_shared_mem, args=(child_pipe, model_state))
+    process = mp.Process(
+        target=worker_process_shared_mem, args=(child_pipe, model_state)
+    )
     process.start()
-    
+
     # Send weights (automatically creates shared buffer on first send)
     weights_td = TensorDict.from_module(policy)
     with torch.no_grad():
         weights_td["weight"].fill_(2.0)
         weights_td["bias"].fill_(1.0)
-    
+
     print(f"Main: Sending weights via shared memory...")
     sender.update_weights(weights_td)
-    
+
     # Workers automatically see updates via shared memory!
     print("Main: Weights are now in shared memory, workers can access them")
-    
+
     # Wait for worker to complete
     process.join(timeout=10.0)
-    
+
     if process.is_alive():
         print("Warning: Worker process did not terminate in time")
         process.terminate()
     else:
-        print(f"Main: Worker completed. Worker's weight sum: {model_state['weight_sum']:.4f}")
+        print(
+            f"Main: Worker completed. Worker's weight sum: {model_state['weight_sum']:.4f}"
+        )
         print(f"✓ Shared memory synchronization successful!")
 
 
 def main():
     """Run all examples."""
-    print("\n" + "="*70)
+    print("\n" + "=" * 70)
     print("Weight Synchronization Schemes - Standalone Usage Examples")
-    print("="*70)
-    
+    print("=" * 70)
+
     # Set multiprocessing start method
     try:
-        mp.set_start_method('spawn')
+        mp.set_start_method("spawn")
     except RuntimeError:
         pass  # Already set
-    
+
     # Run examples
     example_multiprocess_sync()
     example_shared_memory_sync()
-    
-    print("\n" + "="*70)
+
+    print("\n" + "=" * 70)
     print("All examples completed successfully!")
-    print("="*70 + "\n")
+    print("=" * 70 + "\n")
 
 
 if __name__ == "__main__":
     main()
-