Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
119 changes: 69 additions & 50 deletions scripts/demos/pick_and_place.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

# add argparse arguments
parser = argparse.ArgumentParser(description="Keyboard control for Isaac Lab Pick and Place.")
parser.add_argument("--num_envs", type=int, default=32, help="Number of environments to spawn.")
# append AppLauncher cli args
AppLauncher.add_app_launcher_args(parser)
# parse the arguments
Expand Down Expand Up @@ -59,11 +60,22 @@ class PickAndPlaceEnvCfg(DirectRLEnvCfg):
action_space = 4
observation_space = 6
state_space = 0
device = "cpu"

# Simulation cfg. Note that we are forcing the simulation to run on CPU.
# This is because the surface gripper API is only supported on CPU backend for now.
sim: SimulationCfg = SimulationCfg(dt=1 / 60, render_interval=decimation, device="cpu")
# Simulation cfg. We run physics on GPU but enable CPU readback so that
# data is automatically available on CPU for the task/policy.
# sim_device is where physics runs (cuda for performance)
# task_device is where data buffers are allocated (cpu for convenience)
sim: SimulationCfg = SimulationCfg(
dt=1 / 60,
device=args_cli.device, # Physics simulation runs on input device (GPU by default)
render_interval=decimation,
use_fabric=True,
enable_scene_query_support=True,
enable_cpu_readback=True, # Data automatically copied to CPU
)
# Task device - where tensor operations and data buffers live
# This should match where the simulation data is returned (CPU when enable_cpu_readback=True)
device: str = "cpu"
debug_vis = True

# robot
Expand Down Expand Up @@ -136,8 +148,8 @@ def __init__(self, cfg: PickAndPlaceEnvCfg, render_mode: str | None = None, **kw
self.joint_vel = self.pick_and_place.data.joint_vel

# Buffers
self.go_to_cube = False
self.go_to_target = False
self.go_to_cube = torch.zeros(self.num_envs, dtype=torch.bool, device=self.device)
self.go_to_target = torch.zeros(self.num_envs, dtype=torch.bool, device=self.device)
self.target_pos = torch.zeros((self.num_envs, 3), device=self.device, dtype=torch.float32)
self.instant_controls = torch.zeros((self.num_envs, 3), device=self.device, dtype=torch.float32)
self.permanent_controls = torch.zeros((self.num_envs, 1), device=self.device, dtype=torch.float32)
Expand Down Expand Up @@ -173,35 +185,36 @@ def set_up_keyboard(self):
print("Keyboard set up!")
print("The simulation is ready for you to try it out!")
print("Your goal is pick up the purple cube and to drop it on the red sphere!")
print("Use the following controls to interact with the simulation:")
print("Press the 'A' key to have the gripper track the cube position.")
print("Press the 'D' key to have the gripper track the target position")
print("Press the 'W' or 'S' keys to move the gantry UP or DOWN respectively")
print("Press 'Q' or 'E' to OPEN or CLOSE the gripper respectively")
print(f"Number of environments: {self.num_envs}")
print("Use the following controls to interact with ALL environments simultaneously:")
print("Press the 'A' key to have all grippers track the cube position.")
print("Press the 'D' key to have all grippers track the target position")
print("Press the 'W' or 'S' keys to move all gantries UP or DOWN respectively")
print("Press 'Q' or 'E' to OPEN or CLOSE all grippers respectively")

def _on_keyboard_event(self, event):
"""Checks for a keyboard event and assign the corresponding command control depending on key pressed."""
if event.type == carb.input.KeyboardEventType.KEY_PRESS:
# Logic on key press
# Logic on key press - apply to ALL environments
if event.input.name == self._auto_aim_target:
self.go_to_target = True
self.go_to_cube = False
self.go_to_target[:] = True
self.go_to_cube[:] = False
if event.input.name == self._auto_aim_cube:
self.go_to_cube = True
self.go_to_target = False
self.go_to_cube[:] = True
self.go_to_target[:] = False
if event.input.name in self._instant_key_controls:
self.go_to_cube = False
self.go_to_target = False
self.instant_controls[0] = self._instant_key_controls[event.input.name]
self.go_to_cube[:] = False
self.go_to_target[:] = False
self.instant_controls[:] = self._instant_key_controls[event.input.name]
if event.input.name in self._permanent_key_controls:
self.go_to_cube = False
self.go_to_target = False
self.permanent_controls[0] = self._permanent_key_controls[event.input.name]
# On key release, the robot stops moving
self.go_to_cube[:] = False
self.go_to_target[:] = False
self.permanent_controls[:] = self._permanent_key_controls[event.input.name]
# On key release, all robots stop moving
elif event.type == carb.input.KeyboardEventType.KEY_RELEASE:
self.go_to_cube = False
self.go_to_target = False
self.instant_controls[0] = self._instant_key_controls["ZEROS"]
self.go_to_cube[:] = False
self.go_to_target[:] = False
self.instant_controls[:] = self._instant_key_controls["ZEROS"]

def _setup_scene(self):
self.pick_and_place = Articulation(self.cfg.robot_cfg)
Expand All @@ -225,28 +238,31 @@ def _pre_physics_step(self, actions: torch.Tensor) -> None:

def _apply_action(self) -> None:
# We use the keyboard outputs as an action.
if self.go_to_cube:
# Effort based proportional controller to track the cube position
head_pos_x = self.pick_and_place.data.joint_pos[:, self._x_dof_idx[0]]
head_pos_y = self.pick_and_place.data.joint_pos[:, self._y_dof_idx[0]]
cube_pos_x = self.cube.data.root_pos_w[:, 0] - self.scene.env_origins[:, 0]
cube_pos_y = self.cube.data.root_pos_w[:, 1] - self.scene.env_origins[:, 1]
d_cube_robot_x = cube_pos_x - head_pos_x
d_cube_robot_y = cube_pos_y - head_pos_y
self.instant_controls[0] = torch.tensor(
[d_cube_robot_x * 5.0, d_cube_robot_y * 5.0, 0.0], device=self.device
)
elif self.go_to_target:
# Effort based proportional controller to track the target position
head_pos_x = self.pick_and_place.data.joint_pos[:, self._x_dof_idx[0]]
head_pos_y = self.pick_and_place.data.joint_pos[:, self._y_dof_idx[0]]
target_pos_x = self.target_pos[:, 0]
target_pos_y = self.target_pos[:, 1]
d_target_robot_x = target_pos_x - head_pos_x
d_target_robot_y = target_pos_y - head_pos_y
self.instant_controls[0] = torch.tensor(
[d_target_robot_x * 5.0, d_target_robot_y * 5.0, 0.0], device=self.device
)
# Process each environment independently
for env_idx in range(self.num_envs):
if self.go_to_cube[env_idx]:
# Effort based proportional controller to track the cube position
head_pos_x = self.pick_and_place.data.joint_pos[env_idx, self._x_dof_idx[0]]
head_pos_y = self.pick_and_place.data.joint_pos[env_idx, self._y_dof_idx[0]]
cube_pos_x = self.cube.data.root_pos_w[env_idx, 0] - self.scene.env_origins[env_idx, 0]
cube_pos_y = self.cube.data.root_pos_w[env_idx, 1] - self.scene.env_origins[env_idx, 1]
d_cube_robot_x = cube_pos_x - head_pos_x
d_cube_robot_y = cube_pos_y - head_pos_y
self.instant_controls[env_idx] = torch.tensor(
[d_cube_robot_x * 5.0, d_cube_robot_y * 5.0, 0.0], device=self.device
)
elif self.go_to_target[env_idx]:
# Effort based proportional controller to track the target position
head_pos_x = self.pick_and_place.data.joint_pos[env_idx, self._x_dof_idx[0]]
head_pos_y = self.pick_and_place.data.joint_pos[env_idx, self._y_dof_idx[0]]
target_pos_x = self.target_pos[env_idx, 0]
target_pos_y = self.target_pos[env_idx, 1]
d_target_robot_x = target_pos_x - head_pos_x
d_target_robot_y = target_pos_y - head_pos_y
self.instant_controls[env_idx] = torch.tensor(
[d_target_robot_x * 5.0, d_target_robot_y * 5.0, 0.0], device=self.device
Comment on lines +251 to +263
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

style: creating new tensors inside the for loop is inefficient

Each iteration creates a new tensor on the device. For better performance with vectorized environments, compute all controls using vectorized operations:

Suggested change
self.instant_controls[env_idx] = torch.tensor(
[d_cube_robot_x * 5.0, d_cube_robot_y * 5.0, 0.0], device=self.device
)
elif self.go_to_target[env_idx]:
# Effort based proportional controller to track the target position
head_pos_x = self.pick_and_place.data.joint_pos[env_idx, self._x_dof_idx[0]]
head_pos_y = self.pick_and_place.data.joint_pos[env_idx, self._y_dof_idx[0]]
target_pos_x = self.target_pos[env_idx, 0]
target_pos_y = self.target_pos[env_idx, 1]
d_target_robot_x = target_pos_x - head_pos_x
d_target_robot_y = target_pos_y - head_pos_y
self.instant_controls[env_idx] = torch.tensor(
[d_target_robot_x * 5.0, d_target_robot_y * 5.0, 0.0], device=self.device
# Process all environments with vectorized operations
cube_mask = self.go_to_cube
target_mask = self.go_to_target
# Vectorized cube tracking
if cube_mask.any():
head_pos_x = self.pick_and_place.data.joint_pos[cube_mask, self._x_dof_idx[0]]
head_pos_y = self.pick_and_place.data.joint_pos[cube_mask, self._y_dof_idx[0]]
cube_pos_x = self.cube.data.root_pos_w[cube_mask, 0] - self.scene.env_origins[cube_mask, 0]
cube_pos_y = self.cube.data.root_pos_w[cube_mask, 1] - self.scene.env_origins[cube_mask, 1]
d_cube_robot = torch.stack([
(cube_pos_x - head_pos_x) * 5.0,
(cube_pos_y - head_pos_y) * 5.0,
torch.zeros_like(cube_pos_x)
], dim=-1)
self.instant_controls[cube_mask] = d_cube_robot
# Vectorized target tracking
if target_mask.any():
head_pos_x = self.pick_and_place.data.joint_pos[target_mask, self._x_dof_idx[0]]
head_pos_y = self.pick_and_place.data.joint_pos[target_mask, self._y_dof_idx[0]]
target_pos_x = self.target_pos[target_mask, 0]
target_pos_y = self.target_pos[target_mask, 1]
d_target_robot = torch.stack([
(target_pos_x - head_pos_x) * 5.0,
(target_pos_y - head_pos_y) * 5.0,
torch.zeros_like(target_pos_x)
], dim=-1)
self.instant_controls[target_mask] = d_target_robot

)

# Set the joint effort targets for the picker
self.pick_and_place.set_joint_effort_target(
self.instant_controls[:, 0].unsqueeze(dim=1), joint_ids=self._x_dof_idx
Expand All @@ -258,7 +274,7 @@ def _apply_action(self) -> None:
self.permanent_controls[:, 0].unsqueeze(dim=1), joint_ids=self._z_dof_idx
)
# Set the gripper command
self.gripper.set_grippers_command(self.instant_controls[:, 2].unsqueeze(dim=1))
self.gripper.set_grippers_command(self.instant_controls[:, 2])

def _get_observations(self) -> dict:
# Get the observations
Expand Down Expand Up @@ -397,8 +413,11 @@ def _debug_vis_callback(self, event):

def main():
"""Main function."""
# create environment configuration
env_cfg = PickAndPlaceEnvCfg()
env_cfg.scene.num_envs = args_cli.num_envs
# create environment
pick_and_place = PickAndPlaceEnv(PickAndPlaceEnvCfg())
pick_and_place = PickAndPlaceEnv(env_cfg)
obs, _ = pick_and_place.reset()
while simulation_app.is_running():
# check for selected robots
Expand Down
4 changes: 0 additions & 4 deletions scripts/reinforcement_learning/rl_games/play.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,10 +95,6 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
# override configurations with non-hydra CLI arguments
env_cfg.scene.num_envs = args_cli.num_envs if args_cli.num_envs is not None else env_cfg.scene.num_envs
env_cfg.sim.device = args_cli.device if args_cli.device is not None else env_cfg.sim.device
# update agent device to match simulation device
if args_cli.device is not None:
agent_cfg["params"]["config"]["device"] = args_cli.device
agent_cfg["params"]["config"]["device_name"] = args_cli.device

# randomly sample a seed if seed = -1
if args_cli.seed == -1:
Expand Down
5 changes: 0 additions & 5 deletions scripts/reinforcement_learning/rl_games/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,11 +102,6 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
"Please use GPU device (e.g., --device cuda) for distributed training."
)

# update agent device to match simulation device
if args_cli.device is not None:
agent_cfg["params"]["config"]["device"] = args_cli.device
agent_cfg["params"]["config"]["device_name"] = args_cli.device

# randomly sample a seed if seed = -1
if args_cli.seed == -1:
args_cli.seed = random.randint(0, 10000)
Expand Down
2 changes: 1 addition & 1 deletion scripts/reinforcement_learning/rsl_rl/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
env = gym.wrappers.RecordVideo(env, **video_kwargs)

# wrap around environment for rsl-rl
env = RslRlVecEnvWrapper(env, clip_actions=agent_cfg.clip_actions)
env = RslRlVecEnvWrapper(env, clip_actions=agent_cfg.clip_actions, rl_device=agent_cfg.device)

# create runner from rsl-rl
if agent_cfg.class_name == "OnPolicyRunner":
Expand Down
2 changes: 1 addition & 1 deletion source/isaaclab/config/extension.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]

# Note: Semantic Versioning is used: https://semver.org/
version = "0.47.10"
version = "0.48.0"

# Description
title = "Isaac Lab framework for Robot Learning"
Expand Down
49 changes: 49 additions & 0 deletions source/isaaclab/docs/CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,55 @@ Changelog
---------


0.48.0 (2025-11-07)
~~~~~~~~~~~~~~~~~~~

Added
^^^^^

* Added ``enable_cpu_readback`` parameter to :class:`~isaaclab.sim.SimulationCfg` to control whether physics data
is automatically copied from GPU to CPU. When enabled with GPU simulation, allows data to be returned on CPU
while physics runs on GPU.
* Added ``device`` parameter to :class:`~isaaclab.scene.InteractiveScene` to explicitly specify device for scene
tensor allocation, enabling proper device separation between simulation and task/environment.
* Added ``device`` configuration field to :class:`~isaaclab.envs.DirectRLEnvCfg`,
:class:`~isaaclab.envs.DirectMARLEnvCfg`, and :class:`~isaaclab.envs.ManagerBasedEnvCfg` to allow explicit
control of task device independent from simulation device.
* Added simulation device information to environment initialization print output for better visibility of the
three-layer device architecture (simulation device, environment device, training device).

Changed
^^^^^^^

* Modified :class:`~isaaclab.assets.SurfaceGripper` to support GPU simulation with CPU readback. Now validates
that either simulation runs on CPU or ``enable_cpu_readback=True`` is set for GPU simulation.
* Updated all environment classes (:class:`~isaaclab.envs.DirectRLEnv`, :class:`~isaaclab.envs.DirectMARLEnv`,
:class:`~isaaclab.envs.ManagerBasedEnv`) to pass task device to :class:`~isaaclab.scene.InteractiveScene`
for proper device initialization.
* Updated RL training scripts (RSL-RL, RL-Games, skrl, Stable-Baselines3) to decouple simulation device (``--device`` flag)
from RL training device. RL training device now uses configuration defaults unless in distributed mode.
* Enhanced RL library wrappers (:class:`~isaaclab_rl.rsl_rl.RslRlVecEnvWrapper`,
:class:`~isaaclab_rl.rl_games.RlGamesVecEnvWrapper`) to properly handle device transfers between environment
device and RL training device.

Fixed
^^^^^

* Fixed device mismatch issues when using ``enable_cpu_readback=True`` by ensuring ``scene.env_origins`` and
other scene tensors are allocated on the correct task device.
* Fixed RL-Games wrapper to properly transfer observations from environment device to RL device in addition
to existing action transfers.
* Fixed environment buffers (``reset_buf``, ``episode_length_buf``) in :class:`~isaaclab.envs.DirectRLEnv`,
:class:`~isaaclab.envs.DirectMARLEnv`, and :class:`~isaaclab.envs.ManagerBasedRLEnv` to be allocated on
environment device instead of simulation device.
* Fixed environment device property in all environment classes to automatically default to CPU when
``enable_cpu_readback=True`` is set, ensuring ``env_ids`` and other environment buffers are created on
the correct device without requiring explicit ``device`` configuration.
* Fixed ``episode_length_buf`` initialization in :class:`~isaaclab.envs.ManagerBasedRLEnv` to respect
``enable_cpu_readback`` setting, preventing device mismatch errors in termination manager when using
CPU readback with GPU simulation.


0.47.10 (2025-11-06)
~~~~~~~~~~~~~~~~~~~~

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,10 @@ class SurfaceGripper(AssetBase):
function is called automatically for every simulation step, and does not need to be called by the user.

Note:
The SurfaceGripper is only supported on CPU for now. Please set the simulation backend to run on CPU.
Use `--device cpu` to run the simulation on CPU.
The SurfaceGripper requires data on CPU. You can either:

1. Run simulation on CPU: ``sim.device='cpu'``
2. Run simulation on GPU with CPU readback: ``sim.device='cuda:0'`` and ``sim.enable_cpu_readback=True``
"""

def __init__(self, cfg: SurfaceGripperCfg):
Expand Down Expand Up @@ -243,22 +245,32 @@ def _initialize_impl(self) -> None:
"""Initializes the gripper-related handles and internal buffers.

Raises:
ValueError: If the simulation backend is not CPU.
ValueError: If GPU simulation is used without CPU readback enabled.
RuntimeError: If the Simulation Context is not initialized or if gripper prims are not found.

Note:
The SurfaceGripper is only supported on CPU for now. Please set the simulation backend to run on CPU.
Use `--device cpu` to run the simulation on CPU.
The SurfaceGripper requires data on CPU. When using GPU physics (``sim.device='cuda:0'``),
you must enable CPU readback (``sim.enable_cpu_readback=True``) so that data is automatically
copied to CPU.
"""

enable_extension("isaacsim.robot.surface_gripper")
from isaacsim.robot.surface_gripper import GripperView

# Check that we are using the CPU backend.
if self._device != "cpu":
raise Exception(
"SurfaceGripper is only supported on CPU for now. Please set the simulation backend to run on CPU. Use"
" `--device cpu` to run the simulation on CPU."
# Check that if GPU simulation is used, CPU readback must be enabled
# SurfaceGripper needs data on CPU, so either:
# 1. Simulation on CPU (self._device == "cpu"), or
# 2. Simulation on GPU with enable_cpu_readback=True (data returned on CPU)
sim_device = sim_utils.SimulationContext.instance().cfg.device
enable_cpu_readback = sim_utils.SimulationContext.instance().cfg.enable_cpu_readback

if "cuda" in sim_device.lower() and not enable_cpu_readback:
raise ValueError(
f"SurfaceGripper requires data on CPU. Current configuration has simulation device '{sim_device}' "
f"with enable_cpu_readback={enable_cpu_readback}. "
"Please either:\n"
" 1. Set sim.device='cpu', or\n"
" 2. Set sim.enable_cpu_readback=True to run GPU physics with CPU data readback."
)

# obtain the first prim in the regex expression (all others are assumed to be a copy of this)
Expand Down
19 changes: 16 additions & 3 deletions source/isaaclab/isaaclab/envs/direct_marl_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ def __init__(self, cfg: DirectMARLEnvCfg, render_mode: str | None = None, **kwar

# print useful information
print("[INFO]: Base environment:")
print(f"\tSimulation device : {self.sim.device}")
print(f"\tEnvironment device : {self.device}")
print(f"\tEnvironment seed : {self.cfg.seed}")
print(f"\tPhysics step-size : {self.physics_dt}")
Expand All @@ -121,7 +122,7 @@ def __init__(self, cfg: DirectMARLEnvCfg, render_mode: str | None = None, **kwar
with Timer("[INFO]: Time taken for scene creation", "scene_creation"):
# set the stage context for scene creation steps which use the stage
with use_stage(self.sim.get_initial_stage()):
self.scene = InteractiveScene(self.cfg.scene)
self.scene = InteractiveScene(self.cfg.scene, device=self.device)
self._setup_scene()
attach_stage_to_usd_context()
print("[INFO]: Scene manager: ", self.scene)
Expand Down Expand Up @@ -184,7 +185,7 @@ def __init__(self, cfg: DirectMARLEnvCfg, render_mode: str | None = None, **kwar
self.common_step_counter = 0
# -- init buffers
self.episode_length_buf = torch.zeros(self.num_envs, device=self.device, dtype=torch.long)
self.reset_buf = torch.zeros(self.num_envs, dtype=torch.bool, device=self.sim.device)
self.reset_buf = torch.zeros(self.num_envs, dtype=torch.bool, device=self.device)

# setup the observation, state and action spaces
self._configure_env_spaces()
Expand Down Expand Up @@ -266,7 +267,19 @@ def step_dt(self) -> float:

@property
def device(self):
"""The device on which the environment is running."""
"""The device on which the task computations are performed.

This can be different from :attr:`sim.device` when using CPU readback.
For example, physics can run on GPU while task buffers are on CPU.
"""
# If device is explicitly set in config, use that
if hasattr(self.cfg, "device") and self.cfg.device is not None:
return self.cfg.device
# If CPU readback is enabled, default to CPU for environment device
# since simulation data will be automatically copied to CPU
if self.cfg.sim.enable_cpu_readback:
return "cpu"
# Otherwise fall back to simulation device
return self.sim.device

@property
Expand Down
Loading