isaac-sim · kellyguo11 · Nov 7, 2025 · Nov 7, 2025 · Nov 7, 2025 · Nov 7, 2025
@@ -11,6 +11,7 @@
 
 # add argparse arguments
 parser = argparse.ArgumentParser(description="Keyboard control for Isaac Lab Pick and Place.")
+parser.add_argument("--num_envs", type=int, default=32, help="Number of environments to spawn.")
 # append AppLauncher cli args
 AppLauncher.add_app_launcher_args(parser)
 # parse the arguments
@@ -59,11 +60,22 @@ class PickAndPlaceEnvCfg(DirectRLEnvCfg):
     action_space = 4
     observation_space = 6
     state_space = 0
-    device = "cpu"
 
-    # Simulation cfg. Note that we are forcing the simulation to run on CPU.
-    # This is because the surface gripper API is only supported on CPU backend for now.
-    sim: SimulationCfg = SimulationCfg(dt=1 / 60, render_interval=decimation, device="cpu")
+    # Simulation cfg. We run physics on GPU but enable CPU readback so that
+    # data is automatically available on CPU for the task/policy.
+    # sim_device is where physics runs (cuda for performance)
+    # task_device is where data buffers are allocated (cpu for convenience)
+    sim: SimulationCfg = SimulationCfg(
+        dt=1 / 60,
+        device=args_cli.device,  # Physics simulation runs on input device (GPU by default)
+        render_interval=decimation,
+        use_fabric=True,
+        enable_scene_query_support=True,
+        enable_cpu_readback=True,  # Data automatically copied to CPU
+    )
+    # Task device - where tensor operations and data buffers live
+    # This should match where the simulation data is returned (CPU when enable_cpu_readback=True)
+    device: str = "cpu"
     debug_vis = True
 
     # robot
@@ -136,8 +148,8 @@ def __init__(self, cfg: PickAndPlaceEnvCfg, render_mode: str | None = None, **kw
         self.joint_vel = self.pick_and_place.data.joint_vel
 
         # Buffers
-        self.go_to_cube = False
-        self.go_to_target = False
+        self.go_to_cube = torch.zeros(self.num_envs, dtype=torch.bool, device=self.device)
+        self.go_to_target = torch.zeros(self.num_envs, dtype=torch.bool, device=self.device)
         self.target_pos = torch.zeros((self.num_envs, 3), device=self.device, dtype=torch.float32)
         self.instant_controls = torch.zeros((self.num_envs, 3), device=self.device, dtype=torch.float32)
         self.permanent_controls = torch.zeros((self.num_envs, 1), device=self.device, dtype=torch.float32)
@@ -173,35 +185,36 @@ def set_up_keyboard(self):
         print("Keyboard set up!")
         print("The simulation is ready for you to try it out!")
         print("Your goal is pick up the purple cube and to drop it on the red sphere!")
-        print("Use the following controls to interact with the simulation:")
-        print("Press the 'A' key to have the gripper track the cube position.")
-        print("Press the 'D' key to have the gripper track the target position")
-        print("Press the 'W' or 'S' keys to move the gantry UP or DOWN respectively")
-        print("Press 'Q' or 'E' to OPEN or CLOSE the gripper respectively")
+        print(f"Number of environments: {self.num_envs}")
+        print("Use the following controls to interact with ALL environments simultaneously:")
+        print("Press the 'A' key to have all grippers track the cube position.")
+        print("Press the 'D' key to have all grippers track the target position")
+        print("Press the 'W' or 'S' keys to move all gantries UP or DOWN respectively")
+        print("Press 'Q' or 'E' to OPEN or CLOSE all grippers respectively")
 
     def _on_keyboard_event(self, event):
         """Checks for a keyboard event and assign the corresponding command control depending on key pressed."""
         if event.type == carb.input.KeyboardEventType.KEY_PRESS:
-            # Logic on key press
+            # Logic on key press - apply to ALL environments
             if event.input.name == self._auto_aim_target:
-                self.go_to_target = True
-                self.go_to_cube = False
+                self.go_to_target[:] = True
+                self.go_to_cube[:] = False
             if event.input.name == self._auto_aim_cube:
-                self.go_to_cube = True
-                self.go_to_target = False
+                self.go_to_cube[:] = True
+                self.go_to_target[:] = False
             if event.input.name in self._instant_key_controls:
-                self.go_to_cube = False
-                self.go_to_target = False
-                self.instant_controls[0] = self._instant_key_controls[event.input.name]
+                self.go_to_cube[:] = False
+                self.go_to_target[:] = False
+                self.instant_controls[:] = self._instant_key_controls[event.input.name]
             if event.input.name in self._permanent_key_controls:
-                self.go_to_cube = False
-                self.go_to_target = False
-                self.permanent_controls[0] = self._permanent_key_controls[event.input.name]
-        # On key release, the robot stops moving
+                self.go_to_cube[:] = False
+                self.go_to_target[:] = False
+                self.permanent_controls[:] = self._permanent_key_controls[event.input.name]
+        # On key release, all robots stop moving
         elif event.type == carb.input.KeyboardEventType.KEY_RELEASE:
-            self.go_to_cube = False
-            self.go_to_target = False
-            self.instant_controls[0] = self._instant_key_controls["ZEROS"]
+            self.go_to_cube[:] = False
+            self.go_to_target[:] = False
+            self.instant_controls[:] = self._instant_key_controls["ZEROS"]
 
     def _setup_scene(self):
         self.pick_and_place = Articulation(self.cfg.robot_cfg)
@@ -225,28 +238,31 @@ def _pre_physics_step(self, actions: torch.Tensor) -> None:
 
     def _apply_action(self) -> None:
         # We use the keyboard outputs as an action.
-        if self.go_to_cube:
-            # Effort based proportional controller to track the cube position
-            head_pos_x = self.pick_and_place.data.joint_pos[:, self._x_dof_idx[0]]
-            head_pos_y = self.pick_and_place.data.joint_pos[:, self._y_dof_idx[0]]
-            cube_pos_x = self.cube.data.root_pos_w[:, 0] - self.scene.env_origins[:, 0]
-            cube_pos_y = self.cube.data.root_pos_w[:, 1] - self.scene.env_origins[:, 1]
-            d_cube_robot_x = cube_pos_x - head_pos_x
-            d_cube_robot_y = cube_pos_y - head_pos_y
-            self.instant_controls[0] = torch.tensor(
-                [d_cube_robot_x * 5.0, d_cube_robot_y * 5.0, 0.0], device=self.device
-            )
-        elif self.go_to_target:
-            # Effort based proportional controller to track the target position
-            head_pos_x = self.pick_and_place.data.joint_pos[:, self._x_dof_idx[0]]
-            head_pos_y = self.pick_and_place.data.joint_pos[:, self._y_dof_idx[0]]
-            target_pos_x = self.target_pos[:, 0]
-            target_pos_y = self.target_pos[:, 1]
-            d_target_robot_x = target_pos_x - head_pos_x
-            d_target_robot_y = target_pos_y - head_pos_y
-            self.instant_controls[0] = torch.tensor(
-                [d_target_robot_x * 5.0, d_target_robot_y * 5.0, 0.0], device=self.device
-            )
+        # Process each environment independently
+        for env_idx in range(self.num_envs):
+            if self.go_to_cube[env_idx]:
+                # Effort based proportional controller to track the cube position
+                head_pos_x = self.pick_and_place.data.joint_pos[env_idx, self._x_dof_idx[0]]
+                head_pos_y = self.pick_and_place.data.joint_pos[env_idx, self._y_dof_idx[0]]
+                cube_pos_x = self.cube.data.root_pos_w[env_idx, 0] - self.scene.env_origins[env_idx, 0]
+                cube_pos_y = self.cube.data.root_pos_w[env_idx, 1] - self.scene.env_origins[env_idx, 1]
+                d_cube_robot_x = cube_pos_x - head_pos_x
+                d_cube_robot_y = cube_pos_y - head_pos_y
+                self.instant_controls[env_idx] = torch.tensor(
+                    [d_cube_robot_x * 5.0, d_cube_robot_y * 5.0, 0.0], device=self.device
+                )
+            elif self.go_to_target[env_idx]:
+                # Effort based proportional controller to track the target position
+                head_pos_x = self.pick_and_place.data.joint_pos[env_idx, self._x_dof_idx[0]]
+                head_pos_y = self.pick_and_place.data.joint_pos[env_idx, self._y_dof_idx[0]]
+                target_pos_x = self.target_pos[env_idx, 0]
+                target_pos_y = self.target_pos[env_idx, 1]
+                d_target_robot_x = target_pos_x - head_pos_x
+                d_target_robot_y = target_pos_y - head_pos_y
+                self.instant_controls[env_idx] = torch.tensor(
+                    [d_target_robot_x * 5.0, d_target_robot_y * 5.0, 0.0], device=self.device
-                self.instant_controls[env_idx] = torch.tensor(
-                    [d_cube_robot_x * 5.0, d_cube_robot_y * 5.0, 0.0], device=self.device
-                )
-            elif self.go_to_target[env_idx]:
-                # Effort based proportional controller to track the target position
-                head_pos_x = self.pick_and_place.data.joint_pos[env_idx, self._x_dof_idx[0]]
-                head_pos_y = self.pick_and_place.data.joint_pos[env_idx, self._y_dof_idx[0]]
-                target_pos_x = self.target_pos[env_idx, 0]
-                target_pos_y = self.target_pos[env_idx, 1]
-                d_target_robot_x = target_pos_x - head_pos_x
-                d_target_robot_y = target_pos_y - head_pos_y
-                self.instant_controls[env_idx] = torch.tensor(
-                    [d_target_robot_x * 5.0, d_target_robot_y * 5.0, 0.0], device=self.device
+        # Process all environments with vectorized operations
+        cube_mask = self.go_to_cube
+        target_mask = self.go_to_target
+        
+        # Vectorized cube tracking
+        if cube_mask.any():
+            head_pos_x = self.pick_and_place.data.joint_pos[cube_mask, self._x_dof_idx[0]]
+            head_pos_y = self.pick_and_place.data.joint_pos[cube_mask, self._y_dof_idx[0]]
+            cube_pos_x = self.cube.data.root_pos_w[cube_mask, 0] - self.scene.env_origins[cube_mask, 0]
+            cube_pos_y = self.cube.data.root_pos_w[cube_mask, 1] - self.scene.env_origins[cube_mask, 1]
+            d_cube_robot = torch.stack([
+                (cube_pos_x - head_pos_x) * 5.0,
+                (cube_pos_y - head_pos_y) * 5.0,
+                torch.zeros_like(cube_pos_x)
+            ], dim=-1)
+            self.instant_controls[cube_mask] = d_cube_robot
+        
+        # Vectorized target tracking  
+        if target_mask.any():
+            head_pos_x = self.pick_and_place.data.joint_pos[target_mask, self._x_dof_idx[0]]
+            head_pos_y = self.pick_and_place.data.joint_pos[target_mask, self._y_dof_idx[0]]
+            target_pos_x = self.target_pos[target_mask, 0]
+            target_pos_y = self.target_pos[target_mask, 1]
+            d_target_robot = torch.stack([
+                (target_pos_x - head_pos_x) * 5.0,
+                (target_pos_y - head_pos_y) * 5.0,
+                torch.zeros_like(target_pos_x)
+            ], dim=-1)
+            self.instant_controls[target_mask] = d_target_robot
-                self.instant_controls[env_idx] = torch.tensor(
-                    [d_cube_robot_x * 5.0, d_cube_robot_y * 5.0, 0.0], device=self.device
-                )
-            elif self.go_to_target[env_idx]:
-                # Effort based proportional controller to track the target position
-                head_pos_x = self.pick_and_place.data.joint_pos[env_idx, self._x_dof_idx[0]]
-                head_pos_y = self.pick_and_place.data.joint_pos[env_idx, self._y_dof_idx[0]]
-                target_pos_x = self.target_pos[env_idx, 0]
-                target_pos_y = self.target_pos[env_idx, 1]
-                d_target_robot_x = target_pos_x - head_pos_x
-                d_target_robot_y = target_pos_y - head_pos_y
-                self.instant_controls[env_idx] = torch.tensor(
-                    [d_target_robot_x * 5.0, d_target_robot_y * 5.0, 0.0], device=self.device
+        # Process all environments with vectorized operations
+        cube_mask = self.go_to_cube
+        target_mask = self.go_to_target
+        
+        # Vectorized cube tracking
+        if cube_mask.any():
+            head_pos_x = self.pick_and_place.data.joint_pos[cube_mask, self._x_dof_idx[0]]
+            head_pos_y = self.pick_and_place.data.joint_pos[cube_mask, self._y_dof_idx[0]]
+            cube_pos_x = self.cube.data.root_pos_w[cube_mask, 0] - self.scene.env_origins[cube_mask, 0]
+            cube_pos_y = self.cube.data.root_pos_w[cube_mask, 1] - self.scene.env_origins[cube_mask, 1]
+            d_cube_robot = torch.stack([
+                (cube_pos_x - head_pos_x) * 5.0,
+                (cube_pos_y - head_pos_y) * 5.0,
+                torch.zeros_like(cube_pos_x)
+            ], dim=-1)
+            self.instant_controls[cube_mask] = d_cube_robot
+        
+        # Vectorized target tracking  
+        if target_mask.any():
+            head_pos_x = self.pick_and_place.data.joint_pos[target_mask, self._x_dof_idx[0]]
+            head_pos_y = self.pick_and_place.data.joint_pos[target_mask, self._y_dof_idx[0]]
+            target_pos_x = self.target_pos[target_mask, 0]
+            target_pos_y = self.target_pos[target_mask, 1]
+            d_target_robot = torch.stack([
+                (target_pos_x - head_pos_x) * 5.0,
+                (target_pos_y - head_pos_y) * 5.0,
+                torch.zeros_like(target_pos_x)
+            ], dim=-1)
+            self.instant_controls[target_mask] = d_target_robot
+                )
+
         # Set the joint effort targets for the picker
         self.pick_and_place.set_joint_effort_target(
             self.instant_controls[:, 0].unsqueeze(dim=1), joint_ids=self._x_dof_idx
@@ -258,7 +274,7 @@ def _apply_action(self) -> None:
             self.permanent_controls[:, 0].unsqueeze(dim=1), joint_ids=self._z_dof_idx
         )
         # Set the gripper command
-        self.gripper.set_grippers_command(self.instant_controls[:, 2].unsqueeze(dim=1))
+        self.gripper.set_grippers_command(self.instant_controls[:, 2])
 
     def _get_observations(self) -> dict:
         # Get the observations
@@ -397,8 +413,11 @@ def _debug_vis_callback(self, event):
 
 def main():
     """Main function."""
+    # create environment configuration
+    env_cfg = PickAndPlaceEnvCfg()
+    env_cfg.scene.num_envs = args_cli.num_envs
     # create environment
-    pick_and_place = PickAndPlaceEnv(PickAndPlaceEnvCfg())
+    pick_and_place = PickAndPlaceEnv(env_cfg)
     obs, _ = pick_and_place.reset()
     while simulation_app.is_running():
         # check for selected robots

@@ -95,10 +95,6 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
     # override configurations with non-hydra CLI arguments
     env_cfg.scene.num_envs = args_cli.num_envs if args_cli.num_envs is not None else env_cfg.scene.num_envs
     env_cfg.sim.device = args_cli.device if args_cli.device is not None else env_cfg.sim.device
-    # update agent device to match simulation device
-    if args_cli.device is not None:
-        agent_cfg["params"]["config"]["device"] = args_cli.device
-        agent_cfg["params"]["config"]["device_name"] = args_cli.device
 
     # randomly sample a seed if seed = -1
     if args_cli.seed == -1:

@@ -102,11 +102,6 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
             "Please use GPU device (e.g., --device cuda) for distributed training."
         )
 
-    # update agent device to match simulation device
-    if args_cli.device is not None:
-        agent_cfg["params"]["config"]["device"] = args_cli.device
-        agent_cfg["params"]["config"]["device_name"] = args_cli.device
-
     # randomly sample a seed if seed = -1
     if args_cli.seed == -1:
         args_cli.seed = random.randint(0, 10000)

@@ -182,7 +182,7 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
         env = gym.wrappers.RecordVideo(env, **video_kwargs)
 
     # wrap around environment for rsl-rl
-    env = RslRlVecEnvWrapper(env, clip_actions=agent_cfg.clip_actions)
+    env = RslRlVecEnvWrapper(env, clip_actions=agent_cfg.clip_actions, rl_device=agent_cfg.device)
 
     # create runner from rsl-rl
     if agent_cfg.class_name == "OnPolicyRunner":

diff --git a/source/isaaclab/config/extension.toml b/source/isaaclab/config/extension.toml
@@ -1,7 +1,7 @@
 [package]
 
 # Note: Semantic Versioning is used: https://semver.org/
-version = "0.47.10"
+version = "0.48.0"
 
 # Description
 title = "Isaac Lab framework for Robot Learning"

diff --git a/source/isaaclab/docs/CHANGELOG.rst b/source/isaaclab/docs/CHANGELOG.rst
@@ -2,6 +2,55 @@ Changelog
 ---------
 
 
+0.48.0 (2025-11-07)
+~~~~~~~~~~~~~~~~~~~
+
+Added
+^^^^^
+
+* Added ``enable_cpu_readback`` parameter to :class:`~isaaclab.sim.SimulationCfg` to control whether physics data
+  is automatically copied from GPU to CPU. When enabled with GPU simulation, allows data to be returned on CPU
+  while physics runs on GPU.
+* Added ``device`` parameter to :class:`~isaaclab.scene.InteractiveScene` to explicitly specify device for scene
+  tensor allocation, enabling proper device separation between simulation and task/environment.
+* Added ``device`` configuration field to :class:`~isaaclab.envs.DirectRLEnvCfg`, 
+  :class:`~isaaclab.envs.DirectMARLEnvCfg`, and :class:`~isaaclab.envs.ManagerBasedEnvCfg` to allow explicit
+  control of task device independent from simulation device.
+* Added simulation device information to environment initialization print output for better visibility of the
+  three-layer device architecture (simulation device, environment device, training device).
+
+Changed
+^^^^^^^
+
+* Modified :class:`~isaaclab.assets.SurfaceGripper` to support GPU simulation with CPU readback. Now validates
+  that either simulation runs on CPU or ``enable_cpu_readback=True`` is set for GPU simulation.
+* Updated all environment classes (:class:`~isaaclab.envs.DirectRLEnv`, :class:`~isaaclab.envs.DirectMARLEnv`,
+  :class:`~isaaclab.envs.ManagerBasedEnv`) to pass task device to :class:`~isaaclab.scene.InteractiveScene`
+  for proper device initialization.
+* Updated RL training scripts (RSL-RL, RL-Games, skrl, Stable-Baselines3) to decouple simulation device (``--device`` flag)
+  from RL training device. RL training device now uses configuration defaults unless in distributed mode.
+* Enhanced RL library wrappers (:class:`~isaaclab_rl.rsl_rl.RslRlVecEnvWrapper`, 
+  :class:`~isaaclab_rl.rl_games.RlGamesVecEnvWrapper`) to properly handle device transfers between environment
+  device and RL training device.
+
+Fixed
+^^^^^
+
+* Fixed device mismatch issues when using ``enable_cpu_readback=True`` by ensuring ``scene.env_origins`` and
+  other scene tensors are allocated on the correct task device.
+* Fixed RL-Games wrapper to properly transfer observations from environment device to RL device in addition
+  to existing action transfers.
+* Fixed environment buffers (``reset_buf``, ``episode_length_buf``) in :class:`~isaaclab.envs.DirectRLEnv`,
+  :class:`~isaaclab.envs.DirectMARLEnv`, and :class:`~isaaclab.envs.ManagerBasedRLEnv` to be allocated on
+  environment device instead of simulation device.
+* Fixed environment device property in all environment classes to automatically default to CPU when
+  ``enable_cpu_readback=True`` is set, ensuring ``env_ids`` and other environment buffers are created on
+  the correct device without requiring explicit ``device`` configuration.
+* Fixed ``episode_length_buf`` initialization in :class:`~isaaclab.envs.ManagerBasedRLEnv` to respect
+  ``enable_cpu_readback`` setting, preventing device mismatch errors in termination manager when using
+  CPU readback with GPU simulation.
+
+
 0.47.10 (2025-11-06)
 ~~~~~~~~~~~~~~~~~~~~
 

@@ -42,8 +42,10 @@ class SurfaceGripper(AssetBase):
          function is called automatically for every simulation step, and does not need to be called by the user.
 
     Note:
-        The SurfaceGripper is only supported on CPU for now. Please set the simulation backend to run on CPU.
-        Use `--device cpu` to run the simulation on CPU.
+        The SurfaceGripper requires data on CPU. You can either:
+
+        1. Run simulation on CPU: ``sim.device='cpu'``
+        2. Run simulation on GPU with CPU readback: ``sim.device='cuda:0'`` and ``sim.enable_cpu_readback=True``
     """
 
     def __init__(self, cfg: SurfaceGripperCfg):
@@ -243,22 +245,32 @@ def _initialize_impl(self) -> None:
         """Initializes the gripper-related handles and internal buffers.
 
         Raises:
-            ValueError: If the simulation backend is not CPU.
+            ValueError: If GPU simulation is used without CPU readback enabled.
             RuntimeError: If the Simulation Context is not initialized or if gripper prims are not found.
 
         Note:
-            The SurfaceGripper is only supported on CPU for now. Please set the simulation backend to run on CPU.
-            Use `--device cpu` to run the simulation on CPU.
+            The SurfaceGripper requires data on CPU. When using GPU physics (``sim.device='cuda:0'``),
+            you must enable CPU readback (``sim.enable_cpu_readback=True``) so that data is automatically
+            copied to CPU.
         """
 
         enable_extension("isaacsim.robot.surface_gripper")
         from isaacsim.robot.surface_gripper import GripperView
 
-        # Check that we are using the CPU backend.
-        if self._device != "cpu":
-            raise Exception(
-                "SurfaceGripper is only supported on CPU for now. Please set the simulation backend to run on CPU. Use"
-                " `--device cpu` to run the simulation on CPU."
+        # Check that if GPU simulation is used, CPU readback must be enabled
+        # SurfaceGripper needs data on CPU, so either:
+        # 1. Simulation on CPU (self._device == "cpu"), or
+        # 2. Simulation on GPU with enable_cpu_readback=True (data returned on CPU)
+        sim_device = sim_utils.SimulationContext.instance().cfg.device
+        enable_cpu_readback = sim_utils.SimulationContext.instance().cfg.enable_cpu_readback
+
+        if "cuda" in sim_device.lower() and not enable_cpu_readback:
+            raise ValueError(
+                f"SurfaceGripper requires data on CPU. Current configuration has simulation device '{sim_device}' "
+                f"with enable_cpu_readback={enable_cpu_readback}. "
+                "Please either:\n"
+                "  1. Set sim.device='cpu', or\n"
+                "  2. Set sim.enable_cpu_readback=True to run GPU physics with CPU data readback."
             )
 
         # obtain the first prim in the regex expression (all others are assumed to be a copy of this)

@@ -103,6 +103,7 @@ def __init__(self, cfg: DirectMARLEnvCfg, render_mode: str | None = None, **kwar
 
         # print useful information
         print("[INFO]: Base environment:")
+        print(f"\tSimulation device     : {self.sim.device}")
         print(f"\tEnvironment device    : {self.device}")
         print(f"\tEnvironment seed      : {self.cfg.seed}")
         print(f"\tPhysics step-size     : {self.physics_dt}")
@@ -121,7 +122,7 @@ def __init__(self, cfg: DirectMARLEnvCfg, render_mode: str | None = None, **kwar
         with Timer("[INFO]: Time taken for scene creation", "scene_creation"):
             # set the stage context for scene creation steps which use the stage
             with use_stage(self.sim.get_initial_stage()):
-                self.scene = InteractiveScene(self.cfg.scene)
+                self.scene = InteractiveScene(self.cfg.scene, device=self.device)
                 self._setup_scene()
                 attach_stage_to_usd_context()
         print("[INFO]: Scene manager: ", self.scene)
@@ -184,7 +185,7 @@ def __init__(self, cfg: DirectMARLEnvCfg, render_mode: str | None = None, **kwar
         self.common_step_counter = 0
         # -- init buffers
         self.episode_length_buf = torch.zeros(self.num_envs, device=self.device, dtype=torch.long)
-        self.reset_buf = torch.zeros(self.num_envs, dtype=torch.bool, device=self.sim.device)
+        self.reset_buf = torch.zeros(self.num_envs, dtype=torch.bool, device=self.device)
 
         # setup the observation, state and action spaces
         self._configure_env_spaces()
@@ -266,7 +267,19 @@ def step_dt(self) -> float:
 
     @property
     def device(self):
-        """The device on which the environment is running."""
+        """The device on which the task computations are performed.
+
+        This can be different from :attr:`sim.device` when using CPU readback.
+        For example, physics can run on GPU while task buffers are on CPU.
+        """
+        # If device is explicitly set in config, use that
+        if hasattr(self.cfg, "device") and self.cfg.device is not None:
+            return self.cfg.device
+        # If CPU readback is enabled, default to CPU for environment device
+        # since simulation data will be automatically copied to CPU
+        if self.cfg.sim.enable_cpu_readback:
+            return "cpu"
+        # Otherwise fall back to simulation device
         return self.sim.device
 
     @property