diff --git a/.gitignore b/.gitignore index 558ef0b4a..dd9e8afe0 100644 --- a/.gitignore +++ b/.gitignore @@ -96,3 +96,9 @@ src/unilab/assets/checkpoints/**/*.onnx # Backend temp XML emitted next to robot.xml (motrix/mujoco scene materialization) src/unilab/assets/robots/**/tmp*.xml + +# Optional local Drake native extension build output +src/unilab/base/backend/drake/native/*.so + +# Local OMX workflow state +.omx/ diff --git a/benchmark/benchmark_drake_performance.py b/benchmark/benchmark_drake_performance.py new file mode 100644 index 000000000..19aec3950 --- /dev/null +++ b/benchmark/benchmark_drake_performance.py @@ -0,0 +1,416 @@ +#!/usr/bin/env python3 +"""Profile Drake vs MuJoCo env-step performance on selected UniLab tasks. + +This benchmark is intentionally task-level rather than raw-simulator-level. It +keeps UniLab's reset, observation, sensor-view, and body-query paths in the +loop so G1 motion tracking can expose the expensive integration points. +""" + +from __future__ import annotations + +import argparse +import json +import statistics +import sys +import time +from collections.abc import Callable, Sequence +from dataclasses import asdict, dataclass +from pathlib import Path +from typing import Any + +import numpy as np + +ROOT_DIR = Path(__file__).resolve().parents[1] +SRC_DIR = ROOT_DIR / "src" +DEFAULT_DRAKEUNI_SRC = ROOT_DIR.parent / "drakeuni" / "src" +DEFAULT_OUTPUT = ROOT_DIR / "benchmark" / "outputs" / "drake_performance" / "results.json" + + +def _prepend_import_path(path: Path) -> None: + path_str = str(path) + if path_str not in sys.path: + sys.path.insert(0, path_str) + + +def _install_import_paths(drakeuni_src: Path | None) -> None: + for path in (SRC_DIR, ROOT_DIR): + _prepend_import_path(path) + if drakeuni_src is not None and drakeuni_src.exists(): + _prepend_import_path(drakeuni_src) + + +@dataclass(frozen=True) +class TaskSpec: + env_cfg_factory: Callable[[], Any] + env_cls_factory: Callable[[], type] + + +@dataclass +class BenchRecord: + task: str + backend: str + num_envs: int + nthread: int | str + warmup_steps: int + steps: int + action_mode: str + reset_events_mean: float + reset_events_max: int + metrics_median_ms: dict[str, float] + metrics_mean_ms: dict[str, float] + + +class StepProfiler: + """Small monkeypatch profiler for task/backend methods inside one env.""" + + def __init__(self) -> None: + self._current: dict[str, float] | None = None + + def begin(self) -> None: + self._current = {} + + def end(self) -> dict[str, float]: + current = self._current or {} + self._current = None + return current + + def add(self, key: str, seconds: float) -> None: + if self._current is None: + return + self._current[key] = self._current.get(key, 0.0) + seconds * 1000.0 + + def wrap(self, obj: Any, name: str, key: str) -> None: + if not hasattr(obj, name): + return + original = getattr(obj, name) + if getattr(original, "_unilab_benchmark_wrapped", False): + return + + def wrapped(*args: Any, **kwargs: Any) -> Any: + t0 = time.perf_counter() + try: + return original(*args, **kwargs) + finally: + self.add(key, time.perf_counter() - t0) + + setattr(wrapped, "_unilab_benchmark_wrapped", True) + setattr(obj, name, wrapped) + + +def _task_specs() -> dict[str, TaskSpec]: + def go1_cfg() -> Any: + from unilab.envs.locomotion.go1.joystick import Go1JoystickCfg + + return Go1JoystickCfg() + + def go1_env() -> type: + from unilab.envs.locomotion.go1.joystick import Go1WalkTask + + return Go1WalkTask + + def go2_cfg() -> Any: + from unilab.envs.locomotion.go2.joystick import Go2JoystickCfg + + return Go2JoystickCfg() + + def go2_env() -> type: + from unilab.envs.locomotion.go2.joystick import Go2WalkTask + + return Go2WalkTask + + def g1_tracking_cfg() -> Any: + from unilab.envs.motion_tracking.g1.tracking import G1MotionTrackingEnvCfg + + return G1MotionTrackingEnvCfg() + + def g1_tracking_env() -> type: + from unilab.envs.motion_tracking.g1.tracking import G1MotionTrackingEnv + + return G1MotionTrackingEnv + + return { + "g1_motion_tracking": TaskSpec(g1_tracking_cfg, g1_tracking_env), + "go1_joystick_flat": TaskSpec(go1_cfg, go1_env), + "go2_joystick_flat": TaskSpec(go2_cfg, go2_env), + } + + +def _compose_env_cfg(task: str, backend: str, spec: TaskSpec) -> Any: + from hydra import compose, initialize_config_dir + from hydra.core.global_hydra import GlobalHydra + + from unilab.base.registry import apply_cfg_overrides + from unilab.training import BackendAdapter + + GlobalHydra.instance().clear() + with initialize_config_dir(config_dir=str(ROOT_DIR / "conf" / "ppo"), version_base="1.3"): + owner_cfg = compose( + config_name="config", + overrides=[ + f"task={task}/{backend}", + "hydra.run.dir=.", + "hydra.output_subdir=null", + "hydra/job_logging=disabled", + "hydra/hydra_logging=disabled", + ], + ) + + env_cfg_override = BackendAdapter( + owner_cfg, + root_dir=ROOT_DIR, + algo_name="ppo", + ).build_task_env_cfg_override() + cfg = spec.env_cfg_factory() + apply_cfg_overrides(cfg, env_cfg_override) + return cfg + + +def _make_env( + *, + task: str, + backend: str, + num_envs: int, + nthread: int, + spec: TaskSpec, +) -> Any: + cfg = _compose_env_cfg(task, backend, spec) + if backend == "drake": + cfg.drake_nthread = int(nthread) + env_cls = spec.env_cls_factory() + return env_cls(cfg, num_envs=num_envs, backend_type=backend) + + +def _attach_profiler(env: Any, profiler: StepProfiler) -> None: + backend = getattr(env, "_backend", None) + if backend is not None: + profiler.wrap(backend, "step", "backend_step_call_ms") + profiler.wrap(backend, "set_state", "backend_set_state_ms") + profiler.wrap(backend, "get_sensor_data", "sensor_view_ms") + profiler.wrap(backend, "get_sensor_data_rows", "sensor_view_ms") + profiler.wrap(backend, "get_body_pose_w", "body_query_ms") + profiler.wrap(backend, "get_body_pose_w_rows", "body_query_ms") + profiler.wrap(backend, "get_body_vel_w", "body_query_ms") + profiler.wrap(backend, "copy_body_state_w", "body_query_ms") + profiler.wrap(env, "reset", "reset_method_ms") + profiler.wrap(env, "_get_body_state_w", "body_query_ms") + profiler.wrap(env, "_get_current_motion", "motion_current_ms") + + motion_sampler = getattr(env, "motion_sampler", None) + if motion_sampler is not None: + profiler.wrap(motion_sampler, "sample_frames", "motion_sample_ms") + profiler.wrap(motion_sampler, "step", "motion_sampler_step_ms") + profiler.wrap(motion_sampler, "get_current_motion", "motion_current_ms") + + motion_loader = getattr(env, "motion_loader", None) + if motion_loader is not None: + profiler.wrap(motion_loader, "get_motion_at_frame", "motion_lookup_ms") + + +def _actions(env: Any, mode: str, rng: np.random.Generator) -> np.ndarray: + shape = (env.num_envs, int(np.prod(env.action_space.shape))) + if mode == "zeros": + return np.zeros(shape, dtype=np.float32) + if mode == "random": + return rng.uniform(-1.0, 1.0, size=shape).astype(np.float32) + if mode == "small-random": + return rng.normal(0.0, 0.05, size=shape).astype(np.float32) + raise ValueError(f"Unknown action mode: {mode}") + + +def _mean(values: Sequence[float]) -> float: + return float(statistics.fmean(values)) if values else 0.0 + + +def _median(values: Sequence[float]) -> float: + return float(statistics.median(values)) if values else 0.0 + + +def _summarize(samples: list[dict[str, float]]) -> tuple[dict[str, float], dict[str, float]]: + keys = sorted({key for sample in samples for key in sample}) + median = {key: _median([sample.get(key, 0.0) for sample in samples]) for key in keys} + mean = {key: _mean([sample.get(key, 0.0) for sample in samples]) for key in keys} + return median, mean + + +def _benchmark_one( + *, + task: str, + backend: str, + num_envs: int, + nthread: int, + warmup_steps: int, + steps: int, + action_mode: str, + seed: int, + spec: TaskSpec, +) -> BenchRecord: + rng = np.random.default_rng(seed) + env = _make_env(task=task, backend=backend, num_envs=num_envs, nthread=nthread, spec=spec) + profiler = StepProfiler() + _attach_profiler(env, profiler) + env.init_state() + + samples: list[dict[str, float]] = [] + reset_counts: list[int] = [] + total_steps = warmup_steps + steps + for step_idx in range(total_steps): + actions = _actions(env, action_mode, rng) + profiler.begin() + state = env.step(actions) + profile_sample = profiler.end() + if step_idx < warmup_steps: + continue + timing = dict(state.info.get("timing", {})) + sample = {key: float(value) for key, value in timing.items() if np.isscalar(value)} + sample.update(profile_sample) + samples.append(sample) + done = np.asarray(state.terminated) | np.asarray(state.truncated) + reset_counts.append(int(np.count_nonzero(done))) + + median, mean = _summarize(samples) + return BenchRecord( + task=task, + backend=backend, + num_envs=num_envs, + nthread=nthread if backend == "drake" else "auto", + warmup_steps=warmup_steps, + steps=steps, + action_mode=action_mode, + reset_events_mean=_mean(reset_counts), + reset_events_max=max(reset_counts) if reset_counts else 0, + metrics_median_ms=median, + metrics_mean_ms=mean, + ) + + +def _parse_csv(text: str) -> list[str]: + values = [part.strip() for part in text.split(",") if part.strip()] + if not values: + raise ValueError(f"Expected at least one value in {text!r}") + return values + + +def _parse_int_csv(text: str) -> list[int]: + return [int(value) for value in _parse_csv(text)] + + +def _print_table(records: Sequence[BenchRecord]) -> None: + columns = [ + "task", + "backend", + "envs", + "nthread", + "env_total", + "step_core", + "update_state", + "reset_done", + "backend_step", + "backend_physics", + "backend_refresh", + "body_query", + "motion_lookup", + "resets_mean", + ] + print(" | ".join(columns)) + print(" | ".join("---" for _ in columns)) + for record in records: + m = record.metrics_median_ms + row = [ + record.task, + record.backend, + str(record.num_envs), + str(record.nthread), + f"{m.get('env_step_total_ms', 0.0):.3f}", + f"{m.get('step_core_ms', 0.0):.3f}", + f"{m.get('update_state_ms', 0.0):.3f}", + f"{m.get('reset_done_ms', 0.0):.3f}", + f"{m.get('backend_step_ms', m.get('backend_step_call_ms', 0.0)):.3f}", + f"{m.get('backend_physics_ms', 0.0):.3f}", + f"{m.get('backend_refresh_cache_ms', 0.0):.3f}", + f"{m.get('body_query_ms', 0.0):.3f}", + f"{m.get('motion_lookup_ms', 0.0):.3f}", + f"{record.reset_events_mean:.1f}", + ] + print(" | ".join(row)) + + +def main() -> None: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + "--tasks", + default="go1_joystick_flat,go2_joystick_flat", + help=( + "Comma-separated task ids. Defaults stay within committed Drake task configs; " + "pass g1_motion_tracking explicitly when its Drake config is available." + ), + ) + parser.add_argument("--backends", default="drake,mujoco", help="Comma-separated backends.") + parser.add_argument("--num-envs", default="64,256,1024", help="Comma-separated env counts.") + parser.add_argument("--nthreads", default="1,4,8,12,20", help="Comma-separated Drake threads.") + parser.add_argument("--warmup-steps", type=int, default=3) + parser.add_argument("--steps", type=int, default=10) + parser.add_argument( + "--action-mode", + choices=("zeros", "small-random", "random"), + default="zeros", + ) + parser.add_argument("--seed", type=int, default=0) + parser.add_argument("--out-json", type=Path, default=DEFAULT_OUTPUT) + parser.add_argument("--drakeuni-src", type=Path, default=DEFAULT_DRAKEUNI_SRC) + args = parser.parse_args() + + _install_import_paths(args.drakeuni_src) + specs = _task_specs() + task_ids = _parse_csv(args.tasks) + backends = _parse_csv(args.backends) + env_counts = _parse_int_csv(args.num_envs) + nthreads = _parse_int_csv(args.nthreads) + + records: list[BenchRecord] = [] + for task in task_ids: + if task not in specs: + raise ValueError(f"Unknown task {task!r}; available: {sorted(specs)}") + for num_envs in env_counts: + for backend in backends: + thread_values = nthreads if backend == "drake" else [0] + for nthread in thread_values: + print( + f"Running task={task} backend={backend} envs={num_envs} " + f"nthread={nthread if backend == 'drake' else 'auto'}", + flush=True, + ) + records.append( + _benchmark_one( + task=task, + backend=backend, + num_envs=num_envs, + nthread=nthread, + warmup_steps=args.warmup_steps, + steps=args.steps, + action_mode=args.action_mode, + seed=args.seed, + spec=specs[task], + ) + ) + + _print_table(records) + payload = { + "records": [asdict(record) for record in records], + "args": { + "tasks": task_ids, + "backends": backends, + "num_envs": env_counts, + "nthreads": nthreads, + "warmup_steps": args.warmup_steps, + "steps": args.steps, + "action_mode": args.action_mode, + "seed": args.seed, + }, + } + args.out_json.parent.mkdir(parents=True, exist_ok=True) + args.out_json.write_text(json.dumps(payload, indent=2), encoding="utf-8") + print(f"Wrote {args.out_json}") + + +if __name__ == "__main__": + main() diff --git a/conf/offpolicy/task/sac/go2_footstand/drake.yaml b/conf/offpolicy/task/sac/go2_footstand/drake.yaml new file mode 100644 index 000000000..31035486b --- /dev/null +++ b/conf/offpolicy/task/sac/go2_footstand/drake.yaml @@ -0,0 +1,75 @@ +# @package _global_ +training: + task_name: Go2FootStand + sim_backend: drake + no_play: true + play_steps: 400 + play_env_num: 1 + play_render_mode: record + +algo: + algo_log_name: fast_sac_drake + num_envs: 512 + batch_size: 1024 + replay_buffer_n: 512 + updates_per_step: 2 + learning_starts: 2 + max_iterations: 300 + save_interval: 100 + actor_hidden_dim: 256 + critic_hidden_dim: 512 + obs_normalization: true + use_layer_norm: true + algo_params: + alpha_init: 0.005 + target_entropy_ratio: 0.0 + use_compile: false + +env: + sim_dt: 0.004 + drake_backend_mode: batch + drake_nthread: 20 + add_body_sensors: true + obs_history_len: 15 + energy_termination_threshold: 200.0 + noise_config: + level: 1.0 + scale_joint_angle: 0.01 + scale_joint_vel: 1.5 + scale_gyro: 0.2 + scale_gravity: 0.05 + scale_linvel: 0.1 + control_config: + action_scale: 0.3 + domain_rand: + randomize_floor_friction: false + randomize_link_mass: false + torso_added_mass_range: null + randomize_torso_com: false + randomize_dof_armature: false + randomize_reset_joint_qpos: true + reset_joint_qpos_range: [-0.05, 0.05] + +reward: + scales: + height: 2.0 + orientation: 2.0 + contact: -1.0 + action_rate: -0.01 + termination: -2.0 + dof_pos_limits: -0.5 + torques: 0.0 + pose: -0.1 + penalty_contact: -0.2 + tar: 0.8 + rear_feet_contact: 0.5 + rear_leg_symmetry: -0.2 + front_leg_motion: -0.05 + upright_stability: -0.2 + knee_clearance: -0.5 + stay_still: -0.1 + energy: -0.003 + dof_acc: -2.5e-7 + tracking_sigma: 0.25 + base_height_target: 0.3 + knee_height_target: 0.08 diff --git a/conf/offpolicy/task/sac/go2_joystick_flat/drake.yaml b/conf/offpolicy/task/sac/go2_joystick_flat/drake.yaml new file mode 100644 index 000000000..6829cfe93 --- /dev/null +++ b/conf/offpolicy/task/sac/go2_joystick_flat/drake.yaml @@ -0,0 +1,50 @@ +# @package _global_ +training: + task_name: Go2JoystickFlat + sim_backend: drake + no_play: true + play_steps: 400 + play_env_num: 1 + play_render_mode: record + +algo: + algo_log_name: fast_sac_drake + num_envs: 512 + batch_size: 1024 + replay_buffer_n: 512 + updates_per_step: 2 + learning_starts: 2 + max_iterations: 300 + save_interval: 100 + actor_hidden_dim: 256 + critic_hidden_dim: 512 + obs_normalization: true + use_layer_norm: true + algo_params: + alpha_init: 0.005 + target_entropy_ratio: 0.0 + use_compile: false + +env: + drake_backend_mode: batch + drake_nthread: 20 + scene: + model_file: src/unilab/assets/robots/go2/scene_flat.xml + domain_rand: + randomize_kp: false + randomize_kd: false + push_robots: false + +reward: + scales: + tracking_lin_vel: 1.0 + tracking_ang_vel: 0.2 + lin_vel_z: -5.0 + ang_vel_xy: -0.1 + base_height: -100.0 + action_rate: -0.005 + similar_to_default: -0.1 + contact: 0.24 + swing_feet_z: 4.0 + tracking_sigma: 0.25 + base_height_target: 0.3 diff --git a/conf/offpolicy/task/sac/go2w_joystick_flat/drake.yaml b/conf/offpolicy/task/sac/go2w_joystick_flat/drake.yaml new file mode 100644 index 000000000..fa05273e4 --- /dev/null +++ b/conf/offpolicy/task/sac/go2w_joystick_flat/drake.yaml @@ -0,0 +1,63 @@ +# @package _global_ +training: + task_name: Go2WJoystickFlat + sim_backend: drake + no_play: true + play_steps: 400 + play_env_num: 1 + play_render_mode: record + +algo: + algo_log_name: fast_sac_drake + num_envs: 512 + batch_size: 1024 + replay_buffer_n: 512 + updates_per_step: 2 + learning_starts: 2 + max_iterations: 300 + save_interval: 100 + actor_hidden_dim: 256 + critic_hidden_dim: 512 + obs_normalization: true + use_layer_norm: true + algo_params: + alpha_init: 0.005 + target_entropy_ratio: 0.0 + use_compile: false + +env: + drake_backend_mode: batch + drake_nthread: 20 + scene: + model_file: src/unilab/assets/robots/go2w/scene_flat.xml + commands: + vel_limit: + - [0.0, 0.0, -1.0] + - [1.0, 0.0, 1.0] + control_config: + action_scale: 0.5 + wheel_action_scale: 10.0 + Kp: 50.0 + Kd: 1.5 + wheel_Kd: 0.5 + domain_rand: + randomize_kp: false + randomize_kd: false + push_robots: false + +reward: + scales: + tracking_lin_vel: 1.0 + tracking_ang_vel: 0.75 + lin_vel_z: -5.0 + ang_vel_xy: -0.1 + base_height: -100.0 + orientation: -2.0 + action_rate: -0.005 + similar_to_default: -0.5 + torques: -0.0002 + wheel_vel: 0.0 + alive: 0.5 + upward: 1.0 + tracking_sigma: 0.25 + base_height_target: 0.4 diff --git a/conf/offpolicy/task/sac/stewart_balance/drake.yaml b/conf/offpolicy/task/sac/stewart_balance/drake.yaml new file mode 100644 index 000000000..5a672e516 --- /dev/null +++ b/conf/offpolicy/task/sac/stewart_balance/drake.yaml @@ -0,0 +1,38 @@ +# @package _global_ +training: + task_name: StewartBalance + sim_backend: drake + no_play: true + play_steps: 400 + play_env_num: 1 + play_render_mode: record + render_spacing: 4.5 + +algo: + algo_log_name: fast_sac_drake + num_envs: 256 + batch_size: 512 + replay_buffer_n: 512 + updates_per_step: 2 + learning_starts: 2 + max_iterations: 400 + save_interval: 100 + actor_hidden_dim: 128 + critic_hidden_dim: 256 + obs_normalization: true + use_layer_norm: true + algo_params: + alpha_init: 0.005 + target_entropy_ratio: 0.0 + use_compile: false + +env: + drake_backend_mode: batch + drake_nthread: 20 + +reward: + scales: + center: 0.7 + progress: 0.6 + still: 3.0 + fall_penalty: -6.0 diff --git a/conf/ppo/task/go1_joystick_flat/drake.yaml b/conf/ppo/task/go1_joystick_flat/drake.yaml new file mode 100644 index 000000000..3c9f63c80 --- /dev/null +++ b/conf/ppo/task/go1_joystick_flat/drake.yaml @@ -0,0 +1,48 @@ +# @package _global_ +training: + task_name: Go1JoystickFlat + sim_backend: drake + play_steps: 500 + play_env_num: 16 + render_spacing: 0.0 + cam_tracking: true + cam_tracking_env_idx: 0 + cam_tracking_extra_envs: 9 + +interactive: + action_mode: policy + policy_obs_mode: auto + camera_follow_body: true + use_env_visual_model: false + +algo: + num_envs: 1024 + num_steps_per_env: 24 + max_iterations: 151 + save_interval: 100 + obs_groups: + actor: + - actor + +env: + drake_backend_mode: batch + drake_nthread: 0 + scene: + model_file: src/unilab/assets/robots/go1/scene_flat.xml + domain_rand: + randomize_base_mass: false + random_com: false + push_robots: false + +reward: + scales: + tracking_lin_vel: 1.0 + tracking_ang_vel: 0.2 + lin_vel_z: -5.0 + ang_vel_xy: -0.1 + base_height: -100.0 + action_rate: -0.005 + similar_to_default: -0.1 + swing_feet_z: 4.0 + tracking_sigma: 0.25 + base_height_target: 0.3 diff --git a/conf/ppo/task/go2_footstand/drake.yaml b/conf/ppo/task/go2_footstand/drake.yaml new file mode 100644 index 000000000..b8c00f148 --- /dev/null +++ b/conf/ppo/task/go2_footstand/drake.yaml @@ -0,0 +1,67 @@ +# @package _global_ +training: + task_name: Go2FootStand + sim_backend: drake + +env: + sim_dt: 0.004 + drake_backend_mode: batch + drake_nthread: 0 + add_body_sensors: true + obs_history_len: 15 + energy_termination_threshold: 200.0 + noise_config: + level: 1.0 + scale_joint_angle: 0.01 + scale_joint_vel: 1.5 + scale_gyro: 0.2 + scale_gravity: 0.05 + scale_linvel: 0.1 + control_config: + action_scale: 0.3 + domain_rand: + randomize_floor_friction: false + randomize_link_mass: false + torso_added_mass_range: null + randomize_torso_com: false + randomize_dof_armature: false + randomize_reset_joint_qpos: true + reset_joint_qpos_range: [-0.05, 0.05] + +algo: + empirical_normalization: true + num_envs: 1024 + max_iterations: 10000 + obs_groups: + actor: + - actor + critic: + - critic + policy: + init_noise_std: 0.5 + algorithm: + entropy_coef: 0.005 + +reward: + scales: + height: 2.0 + orientation: 2.0 + contact: -1.0 + action_rate: -0.01 + termination: -2.0 + dof_pos_limits: -0.5 + torques: 0.0 + pose: -0.1 + penalty_contact: -0.2 + tar: 0.8 + rear_feet_contact: 0.5 + rear_leg_symmetry: -0.2 + front_leg_motion: -0.05 + upright_stability: -0.2 + knee_clearance: -0.5 + stay_still: -0.1 + energy: -0.003 + dof_acc: -2.5e-7 + tracking_sigma: 0.25 + base_height_target: 0.3 + knee_height_target: 0.08 diff --git a/conf/ppo/task/go2_joystick_flat/drake.yaml b/conf/ppo/task/go2_joystick_flat/drake.yaml new file mode 100644 index 000000000..e246865f7 --- /dev/null +++ b/conf/ppo/task/go2_joystick_flat/drake.yaml @@ -0,0 +1,41 @@ +# @package _global_ +training: + task_name: Go2JoystickFlat + sim_backend: drake + +algo: + num_envs: 1024 + max_iterations: 151 + empirical_normalization: true + obs_groups: + actor: + - actor + policy: + init_noise_std: 0.5 + algorithm: + learning_rate: 3.0e-4 + entropy_coef: 1.0e-3 + +env: + drake_backend_mode: batch + drake_nthread: 0 + scene: + model_file: src/unilab/assets/robots/go2/scene_flat.xml + domain_rand: + randomize_kp: false + randomize_kd: false + push_robots: false + +reward: + scales: + tracking_lin_vel: 1.0 + tracking_ang_vel: 0.2 + lin_vel_z: -5.0 + ang_vel_xy: -0.1 + base_height: -100.0 + action_rate: -0.005 + similar_to_default: -0.1 + contact: 0.24 + swing_feet_z: 4.0 + tracking_sigma: 0.25 + base_height_target: 0.3 diff --git a/conf/ppo/task/go2w_joystick_flat/drake.yaml b/conf/ppo/task/go2w_joystick_flat/drake.yaml new file mode 100644 index 000000000..6a5b59ef5 --- /dev/null +++ b/conf/ppo/task/go2w_joystick_flat/drake.yaml @@ -0,0 +1,54 @@ +# @package _global_ +training: + task_name: Go2WJoystickFlat + sim_backend: drake + +algo: + num_envs: 1024 + max_iterations: 151 + empirical_normalization: true + obs_groups: + actor: + - actor + policy: + init_noise_std: 0.5 + algorithm: + learning_rate: 3.0e-4 + entropy_coef: 1.0e-3 + +env: + drake_backend_mode: batch + drake_nthread: 0 + scene: + model_file: src/unilab/assets/robots/go2w/scene_flat.xml + commands: + vel_limit: + - [0.0, 0.0, -1.0] + - [1.0, 0.0, 1.0] + control_config: + action_scale: 0.5 + wheel_action_scale: 10.0 + Kp: 50.0 + Kd: 1.5 + wheel_Kd: 0.5 + domain_rand: + randomize_kp: false + randomize_kd: false + push_robots: false + +reward: + scales: + tracking_lin_vel: 1.0 + tracking_ang_vel: 0.75 + lin_vel_z: -5.0 + ang_vel_xy: -0.1 + base_height: -100.0 + orientation: -2.0 + action_rate: -0.005 + similar_to_default: -0.5 + torques: -0.0002 + wheel_vel: 0.0 + alive: 0.5 + upward: 1.0 + tracking_sigma: 0.25 + base_height_target: 0.4 diff --git a/conf/ppo/task/stewart_balance/drake.yaml b/conf/ppo/task/stewart_balance/drake.yaml new file mode 100644 index 000000000..cdd440832 --- /dev/null +++ b/conf/ppo/task/stewart_balance/drake.yaml @@ -0,0 +1,51 @@ +# @package _global_ +training: + task_name: StewartBalance + sim_backend: drake + render_spacing: 4.5 + +env: + drake_backend_mode: batch + drake_nthread: 0 + +algo: + num_envs: 128 + num_steps_per_env: 64 + max_iterations: 400 + empirical_normalization: true + obs_groups: + actor: [policy] + critic: [policy] + actor: + class_name: rsl_rl.models.MLPModel + hidden_dims: [128, 128] + activation: tanh + obs_normalization: true + distribution_cfg: + class_name: rsl_rl.modules.distribution.GaussianDistribution + init_std: 0.3 + std_type: scalar + critic: + class_name: rsl_rl.models.MLPModel + hidden_dims: [128, 128] + activation: tanh + obs_normalization: true + algorithm: + learning_rate: 1.0e-4 + num_learning_epochs: 5 + num_mini_batches: 4 + clip_param: 0.15 + entropy_coef: 1.0e-4 + value_loss_coef: 1.0 + desired_kl: 0.012 + max_grad_norm: 0.5 + gamma: 0.99 + lam: 0.95 + save_interval: 50 + +reward: + scales: + center: 0.7 + progress: 0.6 + still: 3.0 + fall_penalty: -6.0 diff --git a/docs/sphinx/source/zh_CN/5-reference/5-support_matrix.md b/docs/sphinx/source/zh_CN/5-reference/5-support_matrix.md index d0a00d0d8..92cbddc22 100644 --- a/docs/sphinx/source/zh_CN/5-reference/5-support_matrix.md +++ b/docs/sphinx/source/zh_CN/5-reference/5-support_matrix.md @@ -107,6 +107,7 @@ uv run scripts/generate_support_matrix.py --write | SAC (torch) | `g1_flip_tracking` (G1 flip tracking) | Tested | Registered | | SAC (torch) | `g1_wall_flip_tracking` (G1 wall flip tracking) | Tested | Registered | | SAC (torch) | `g1_wbt_obs` (g1 wbt obs) | Tested | Registered | +| SAC (torch) | `stewart_balance` (stewart balance) | Tested | Registered | | TD3 (torch) | `go1_joystick_flat` (Go1 joystick) | Registered | Tested | | TD3 (torch) | `go2_joystick_flat` (Go2 joystick) | Registered | Tested | | TD3 (torch) | `g1_walk_flat` (G1 walk flat) | Tested | Registered | diff --git a/src/unilab/base/backend/__init__.py b/src/unilab/base/backend/__init__.py index ba49b4eca..466afb3a5 100644 --- a/src/unilab/base/backend/__init__.py +++ b/src/unilab/base/backend/__init__.py @@ -43,6 +43,18 @@ def _load_motrix_scene_export(name: str) -> Any: return getattr(scene, name) +def _load_drake_backend() -> Any: + from .drake.backend import DrakeBackend + + return DrakeBackend + + +def _drake_available() -> bool: + from .drake.backend import ensure_drake_batch_available + + return ensure_drake_batch_available()[0] + + def create_backend( backend_type: str, scene: SceneCfg, @@ -53,7 +65,7 @@ def create_backend( """Create a simulation backend. Args: - backend_type: ``"mujoco"`` or ``"motrix"``. + backend_type: ``"mujoco"``, ``"motrix"``, or ``"drake"``. scene: SceneCfg for either static or composed scenes. num_envs: Number of environments. sim_dt: Simulation timestep. @@ -69,6 +81,8 @@ def create_backend( position_actuator_gains = kwargs.pop("position_actuator_gains", None) motrix_max_iterations = kwargs.pop("motrix_max_iterations", None) post_step_forward_sensor = kwargs.pop("post_step_forward_sensor", None) + drake_backend_mode = kwargs.pop("drake_backend_mode", "batch") + drake_nthread = kwargs.pop("drake_nthread", None) if backend_type == "mujoco": MuJoCoBackend = _load_mujoco_backend() if position_actuator_gains is not None: @@ -83,6 +97,18 @@ def create_backend( if motrix_max_iterations is not None: kwargs["max_iterations"] = motrix_max_iterations return cast(SimBackend, MotrixBackend(scene, num_envs, sim_dt, **kwargs)) + if backend_type == "drake": + DrakeBackend = _load_drake_backend() + # DrakeUni is a generic batch engine. Task-level body names and scalar + # gain overrides are consumed by other backends, but Drake reads bodies, + # actuators, and sensors from the model contract itself. + kwargs.pop("base_name", None) + kwargs.pop("push_body_name", None) + kwargs.pop("add_body_sensors", None) + kwargs["drake_backend_mode"] = drake_backend_mode + if drake_nthread is not None: + kwargs["nthread"] = drake_nthread + return cast(SimBackend, DrakeBackend(scene, num_envs, sim_dt, **kwargs)) raise ValueError(f"Unknown backend: {backend_type}") @@ -93,6 +119,10 @@ def __getattr__(name: str): return _load_motrix_backend()[0] if name == "MOTRIX_AVAILABLE": return _load_motrix_backend()[1] + if name == "DrakeBackend": + return _load_drake_backend() + if name == "DRAKE_AVAILABLE": + return _drake_available() if name in _MUJOCO_XML_EXPORTS: from .mujoco import xml @@ -106,6 +136,8 @@ def __getattr__(name: str): "SimBackend", "MuJoCoBackend", "MotrixBackend", + "DrakeBackend", + "DRAKE_AVAILABLE", "add_sensor", "create_discardvisual_xml", "create_backend", diff --git a/src/unilab/base/backend/drake/__init__.py b/src/unilab/base/backend/drake/__init__.py new file mode 100644 index 000000000..0b46a3faf --- /dev/null +++ b/src/unilab/base/backend/drake/__init__.py @@ -0,0 +1,29 @@ +from __future__ import annotations + +__all__ = [ + "DRAKE_AVAILABLE", + "DRAKE_BATCH_AVAILABLE", + "DrakeBackend", + "run_drake_playback", +] + + +def __getattr__(name: str): + if name in {"DRAKE_AVAILABLE", "DRAKE_BATCH_AVAILABLE", "DrakeBackend"}: + from .backend import ( + DRAKE_AVAILABLE, + DRAKE_BATCH_AVAILABLE, + DrakeBackend, + ) + + values = { + "DRAKE_AVAILABLE": DRAKE_AVAILABLE, + "DRAKE_BATCH_AVAILABLE": DRAKE_BATCH_AVAILABLE, + "DrakeBackend": DrakeBackend, + } + return values[name] + if name == "run_drake_playback": + from .playback import run_drake_playback + + return run_drake_playback + raise AttributeError(name) diff --git a/src/unilab/base/backend/drake/backend.py b/src/unilab/base/backend/drake/backend.py new file mode 100644 index 000000000..06637d1b7 --- /dev/null +++ b/src/unilab/base/backend/drake/backend.py @@ -0,0 +1,766 @@ +"""UniLab adapter for the DrakeUni batch runtime. + +UniLab owns task logic, reset sampling, named sensor views, and training flow. +DrakeUni owns Drake model construction, batched stepping, and raw sensor +evaluation. This module translates the ``SimBackend`` contract into DrakeUni +runtime calls and keeps UniLab's cached state/sensor views synchronized. +""" + +from __future__ import annotations + +import sys +import time +from collections.abc import Callable, Sequence +from dataclasses import dataclass +from importlib.util import find_spec +from multiprocessing import cpu_count +from os import PathLike +from pathlib import Path +from typing import Any, cast + +import numpy as np + +from unilab.base.backend.base import ( + BackendPlayCapabilities, + BackendPlayRenderPlan, + SimBackend, + normalize_play_render_mode, +) +from unilab.base.backend.drake.playback import run_drake_playback +from unilab.base.scene import SceneCfg +from unilab.dr.types import ( + DomainRandomizationCapabilities, + IntervalRandomizationPlan, + ResetRandomizationPayload, +) + + +# DrakeUni availability globals. These are cheap import-time probes so callers +# can ask whether Drake support exists without constructing a backend. +def _module_available(name: str) -> bool: + try: + return find_spec(name) is not None + except (ImportError, AttributeError, ValueError): + return False + + +DRAKE_AVAILABLE = _module_available("drakeuni") +DRAKE_IMPORT_ERROR: ImportError | None = None +DRAKE_BATCH_AVAILABLE = _module_available("drakeuni") +DRAKE_BATCH_IMPORT_ERROR: ImportError | None = None +DrakeBatchConfig = None +create_drake_runtime = None + +_DRAKEUNI_SYMBOLS_LOADED = False + + +# Lazy import and pydrake guard helpers. +def _pydrake_loaded() -> bool: + # DrakeUni's batch extension owns Drake symbol loading; mixing it with an + # already-imported pydrake module has produced unstable process state. + return any(name == "pydrake" or name.startswith("pydrake.") for name in sys.modules) + + +def _load_drakeuni_symbols() -> None: + """Load DrakeUni only when a Drake backend is actually constructed.""" + + global DRAKE_AVAILABLE + global DRAKE_BATCH_AVAILABLE + global DRAKE_BATCH_IMPORT_ERROR + global DrakeBatchConfig + global create_drake_runtime + global _DRAKEUNI_SYMBOLS_LOADED + + if _DRAKEUNI_SYMBOLS_LOADED: + return + try: + from drakeuni.runtime import DrakeBatchConfig as ImportedDrakeBatchConfig + from drakeuni.runtime import batch_diagnostics + from drakeuni.runtime import create_runtime as imported_create_runtime + except ImportError as exc: # pragma: no cover - optional local package. + DRAKE_AVAILABLE = False + DRAKE_BATCH_AVAILABLE = False + DRAKE_BATCH_IMPORT_ERROR = exc + raise ImportError("DrakeUni batch runtime is not installed.") from exc + + diagnostics = batch_diagnostics() + if not diagnostics.batch_available: + detail = diagnostics.batch_import_error + import_error = ImportError(detail or "DrakeEnvPool batch extension has not been built.") + DRAKE_AVAILABLE = False + DRAKE_BATCH_AVAILABLE = False + DRAKE_BATCH_IMPORT_ERROR = import_error + raise ImportError("DrakeEnvPool batch extension has not been built.") from import_error + + DrakeBatchConfig = ImportedDrakeBatchConfig + create_drake_runtime = imported_create_runtime + DRAKE_AVAILABLE = True + DRAKE_BATCH_AVAILABLE = True + DRAKE_BATCH_IMPORT_ERROR = None + _DRAKEUNI_SYMBOLS_LOADED = True + + +def ensure_drake_batch_available() -> tuple[bool, ImportError | None]: + """Report whether the DrakeUni batch extension can be used.""" + + try: + _load_drakeuni_symbols() + except ImportError as exc: + return False, exc + return True, None + + +# Floating-base compact state starts with xyz + quaternion in qpos and +# 3 linear + 3 angular components in qvel. UniLab usually wants only the +# actuated joint slices behind those root coordinates. +ROOT_QPOS_DIM = 7 +ROOT_QVEL_DIM = 6 + + +# Small helper types. +@dataclass(frozen=True) +class _DrakeUniModelView: + """Read-only model-shape facade for UniLab's ``backend.model`` API. + + DrakeUni exposes model dimensions through ``model_info``. UniLab's backend + contract expects a model-like object with dimension methods, so this facade + carries those shape queries without exposing Drake internals. + """ + + nq: int + nv: int + nu: int + + def num_positions(self) -> int: + return self.nq + + def num_velocities(self) -> int: + return self.nv + + def num_actuators(self) -> int: + return self.nu + + +# Path and thread helpers. +def _resolve_batch_nthread(num_envs: int, requested: int) -> int: + """Resolve a worker count without creating idle workers above num_envs.""" + + env_count = max(1, int(num_envs)) + requested_count = int(requested) + if requested_count > 0: + return min(env_count, requested_count) + return min(env_count, max(1, cpu_count() * 2)) + + +def _resolve_scene_path(scene: SceneCfg) -> Path: + """Convert UniLab's scene pointer into an absolute model path.""" + + if not scene.model_file: + raise ValueError("DrakeBackend requires SceneCfg.model_file") + path = Path(scene.model_file) + return path if path.is_absolute() else Path.cwd() / path + + +class DrakeBackend(SimBackend): + """UniLab ``SimBackend`` implementation backed by DrakeUni batch runtime. + + The backend keeps the public UniLab API stable while delegating model + construction, integration, and raw sensor evaluation to DrakeUni. + """ + + backend_type = "drake" + + def __init__( + self, + scene: SceneCfg, + num_envs: int, + sim_dt: float, + *, + drake_backend_mode: str = "batch", + nthread: int = 0, + ) -> None: + # Validate the backend mode at construction so Hydra/config mistakes + # fail at the backend boundary. + mode = str(drake_backend_mode or "batch").strip().lower() + if mode != "batch": + raise ValueError( + "UniLab DrakeBackend requires drake_backend_mode='batch'. " + f"Got {drake_backend_mode!r}." + ) + if _pydrake_loaded(): + raise ImportError( + "Drake batch backend cannot be loaded after pydrake has already " + "been imported in this process. Start a fresh process before " + "constructing DrakeBackend." + ) + if int(num_envs) < 1: + raise ValueError(f"DrakeUni batch backend requires num_envs >= 1, got {num_envs}") + _load_drakeuni_symbols() + if DrakeBatchConfig is None or create_drake_runtime is None: + detail = DRAKE_BATCH_IMPORT_ERROR + message = "DrakeUni runtime is not available." + if detail is not None: + message = f"{message} Import error: {detail}" + raise ImportError(message) from detail + + self._pre_step_control_fn = None + self._scene_cleanup_handle = None + self._num_envs = int(num_envs) + self._sim_dt = float(sim_dt) + self._scene_model_file = str(_resolve_scene_path(scene)) + # DrakeUni receives only generic batch facts. Task concepts such as + # base bodies, push targets, and observation semantics stay in UniLab. + config = DrakeBatchConfig( + model_file=self._scene_model_file, + num_envs=self._num_envs, + sim_dt=self._sim_dt, + nthread=int(nthread), + ) + self._runtime = create_drake_runtime(config) + model_info = self._runtime.model_info() + # Cache static model metadata once and expose copies through the + # UniLab backend contract. + self._home_qpos_mujoco = model_info.home_qpos.copy() + self._home_qvel_mujoco = model_info.home_qvel.copy() + self._ctrl_limits = model_info.ctrl_limits.copy() + self._joint_ranges = model_info.joint_ranges.copy() + self._actuator_stiffness = model_info.actuator_stiffness.copy() + self._actuator_damping = model_info.actuator_damping.copy() + self._actuator_qpos_adr = model_info.actuator_qpos_adr.astype(np.intp, copy=True) + self._actuator_qvel_adr = model_info.actuator_qvel_adr.astype(np.intp, copy=True) + self._sensor_names = tuple(model_info.sensor_names) + self._sensor_adr = model_info.sensor_adr.copy() + self._sensor_dim = model_info.sensor_dim.copy() + self._site_name_to_id = { + str(name): index for index, name in enumerate(getattr(model_info, "site_names", ())) + } + self._joint_qpos_adr_by_name = { + str(name): int(adr) + for name, adr in zip( + getattr(model_info, "joint_names", ()), + getattr(model_info, "joint_qpos_adr", ()), + strict=True, + ) + } + self._joint_qvel_adr_by_name = { + str(name): int(adr) + for name, adr in zip( + getattr(model_info, "joint_names", ()), + getattr(model_info, "joint_qvel_adr", ()), + strict=True, + ) + } + self._joint_dims_by_name = { + str(name): (int(qpos_dim), int(qvel_dim)) + for name, qpos_dim, qvel_dim in zip( + getattr(model_info, "joint_names", ()), + getattr(model_info, "joint_qpos_dim", ()), + getattr(model_info, "joint_qvel_dim", ()), + strict=True, + ) + } + self._root_qpos_dim = ( + int(np.min(self._actuator_qpos_adr)) if self._actuator_qpos_adr.size else 0 + ) + self._root_qvel_dim = ( + int(np.min(self._actuator_qvel_adr)) if self._actuator_qvel_adr.size else 0 + ) + self._num_bodies = int(model_info.num_bodies) + self._pending_body_forces = np.zeros( + (self._num_envs, self._num_bodies, 3), dtype=np.float64 + ) + self._model = _DrakeUniModelView( + nq=int(model_info.nq), + nv=int(model_info.nv), + nu=int(model_info.nu), + ) + self._nthread = int(getattr(self._runtime, "nthread", int(nthread))) + # Runtime state and raw sensor views are refreshed after reset/step. + self._physics_state = self._runtime.physics_state() + self._sensor_data = np.zeros( + (self._num_envs, int(model_info.nsensordata)), + dtype=np.float64, + ) + self._sensor_views: dict[str, np.ndarray] = {} + self._sync_runtime_state() + + # Static model contract. + # + # These accessors expose stable dimensions, limits, and reset defaults from + # the cached model metadata. + @property + def scene_model_file(self) -> str: + return self._scene_model_file + + @property + def num_envs(self) -> int: + return self._num_envs + + @property + def nthread(self) -> int: + return self._nthread + + @property + def model(self) -> _DrakeUniModelView: + return self._model + + @property + def num_actuators(self) -> int: + return self._model.nu + + @property + def num_dof_vel(self) -> int: + return int(self._actuator_qvel_adr.size) + + # Return copies for arrays that UniLab may clamp, concatenate, or normalize. + # The backend cache should not be mutated by task-side code. + def get_actuator_ctrl_range(self) -> np.ndarray: + return self._ctrl_limits.copy() + + def get_joint_range(self) -> np.ndarray | None: + return self._joint_ranges.copy() + + def get_keyframe_qpos(self, name: str) -> np.ndarray: + if name == "home": + return self._home_qpos_mujoco.copy() + return self._runtime.keyframe_qpos(str(name)) + + def get_default_qpos(self) -> np.ndarray: + return self._home_qpos_mujoco.copy() + + def get_init_qvel(self) -> np.ndarray: + return self._home_qvel_mujoco.copy() + + def get_actuator_gains(self) -> tuple[np.ndarray, np.ndarray]: + return (self._actuator_stiffness.copy(), self._actuator_damping.copy()) + + def get_body_ids(self, names: Sequence[str]) -> np.ndarray: + # Body IDs are owned by DrakeUni because they depend on the materialized + # Drake model, not on UniLab's scene pointer. + return self._runtime.body_ids(tuple(str(name) for name in names)) + + def get_motion_body_ids(self, names: Sequence[str]) -> np.ndarray: + return self.get_body_ids(names) + + def get_site_ids(self, names: Sequence[str]) -> np.ndarray: + ids: list[int] = [] + for name in names: + key = str(name) + try: + ids.append(self._site_name_to_id[key]) + except KeyError as exc: + raise ValueError(f"Drake model does not contain MJCF site {key!r}") from exc + return np.asarray(ids, dtype=np.int32) + + def get_joint_dof_indices(self, names: Sequence[str]) -> np.ndarray: + indices: list[int] = [] + for name in names: + key = str(name) + self._require_single_dof_joint(key) + try: + indices.append(self._joint_qvel_adr_by_name[key]) + except KeyError as exc: + raise ValueError(f"Drake model does not contain joint {key!r}") from exc + return np.asarray(indices, dtype=np.int32) + + def get_joint_dof_pos_indices(self, names: Sequence[str]) -> np.ndarray: + indices: list[int] = [] + for name in names: + key = str(name) + self._require_single_dof_joint(key) + try: + indices.append(self._joint_qpos_adr_by_name[key] - self._root_qpos_dim) + except KeyError as exc: + raise ValueError(f"Drake model does not contain joint {key!r}") from exc + return np.asarray(indices, dtype=np.int32) + + def get_joint_dof_vel_indices(self, names: Sequence[str]) -> np.ndarray: + indices: list[int] = [] + for name in names: + key = str(name) + self._require_single_dof_joint(key) + try: + indices.append(self._joint_qvel_adr_by_name[key] - self._root_qvel_dim) + except KeyError as exc: + raise ValueError(f"Drake model does not contain joint {key!r}") from exc + return np.asarray(indices, dtype=np.int32) + + # Stepping and reset. + def step(self, ctrl: np.ndarray, nsteps: int = 1) -> dict | None: + # UniLab passes one actuator command per env. An optional pre-step hook + # can convert policy actions into backend-native position targets. + step_count = int(nsteps) + if step_count < 1: + raise ValueError(f"nsteps must be >= 1, got {nsteps}") + values = np.asarray(ctrl, dtype=np.float64) + if values.shape != (self._num_envs, self.num_actuators): + raise ValueError( + "DrakeUni batch backend step expected ctrl shape " + f"({self._num_envs}, {self.num_actuators}), got {values.shape}" + ) + start = time.perf_counter() + try: + if self._pre_step_control_fn is None: + output = self._runtime.step(values, step_count, self._pending_body_forces_or_none()) + self._sync_runtime_state(output) + else: + output = None + for _ in range(step_count): + native_ctrl = self._apply_pre_step_control(values) + output = self._runtime.step(native_ctrl, 1, self._pending_body_forces_or_none()) + self._sync_runtime_state(output) + finally: + self._pending_body_forces.fill(0.0) + timing = dict(output.get("timing", {})) + timing.setdefault("step_ms", (time.perf_counter() - start) * 1000.0) + return {"timing": timing} + + def set_state( + self, + env_indices: np.ndarray, + qpos: np.ndarray, + qvel: np.ndarray, + randomization: ResetRandomizationPayload | None = None, + ) -> None: + # Reset is the handoff from UniLab's sampled state tensors into + # DrakeUni's per-env runtime contexts. + if randomization is not None and not randomization.is_empty(): + raise NotImplementedError( + "DrakeUni batch backend does not apply reset randomization yet" + ) + indices = np.asarray(env_indices, dtype=np.int32) + qpos_rows = np.asarray(qpos, dtype=np.float64) + qvel_rows = np.asarray(qvel, dtype=np.float64) + if indices.ndim != 1: + raise ValueError(f"env_indices must be one-dimensional, got {indices.shape}") + if np.any(indices < 0) or np.any(indices >= self._num_envs): + raise IndexError( + f"env_indices must be in [0, {self._num_envs - 1}], got {indices.tolist()}" + ) + if qpos_rows.shape != (indices.size, self._model.nq): + raise ValueError(f"qpos must have shape ({indices.size}, {self._model.nq})") + if qvel_rows.shape != (indices.size, self._model.nv): + raise ValueError(f"qvel must have shape ({indices.size}, {self._model.nv})") + output = self._runtime.reset(indices, qpos_rows, qvel_rows) + self._sync_runtime_state(output) + + # Playback and domain randomization. + def get_dr_capabilities(self) -> DomainRandomizationCapabilities: + # Unsupported randomization knobs fail explicitly instead of silently + # becoming no-ops. + return DomainRandomizationCapabilities(supports_interval_body_force=True) + + def apply_interval_randomization(self, plan: IntervalRandomizationPlan) -> None: + if plan.is_empty(): + return + self._pending_body_forces.fill(0.0) + if plan.push_perturbation_limit is not None: + raise NotImplementedError( + "DrakeBackend interval pushes require explicit body_ids and body_force" + ) + if plan.body_force is not None: + if plan.body_ids is None: + raise ValueError("Interval body-force perturbation requires body_ids") + self.apply_body_force(plan.body_ids, plan.body_force) + if plan.body_linear_velocity_delta is not None: + raise NotImplementedError( + "DrakeUni batch backend does not support interval body velocity perturbation" + ) + + def get_play_capabilities(self) -> BackendPlayCapabilities: + # Drake advances playback physics, while the shared playback helper + # handles recording. There is no native interactive Drake viewer path. + return BackendPlayCapabilities( + supports_native_interactive_renderer=False, + supports_physics_state_playback=True, + supports_native_video_capture=False, + ) + + def resolve_play_render_plan( + self, + *, + play_render_mode: str | None, + play_steps: int | None, + output_video: str | PathLike[str] | None, + ) -> BackendPlayRenderPlan: + mode = normalize_play_render_mode(play_render_mode) + if mode in {"none", "auto"}: + return BackendPlayRenderPlan( + mode=mode, + headless=True, + record_video=False, + num_steps=play_steps, + output_video=None, + ) + if mode == "interactive": + raise NotImplementedError( + "DrakeUni batch backend does not support interactive rendering" + ) + if play_steps is None: + raise ValueError("DrakeUni record playback requires a finite play_steps value.") + if output_video is None: + raise ValueError("DrakeUni record playback requires an output video path.") + return BackendPlayRenderPlan( + mode="record", + headless=True, + record_video=True, + num_steps=int(play_steps), + output_video=output_video, + ) + + def run_playback( + self, + *, + env: Any, + initialize: Callable[[], Any], + step: Callable[[Any], Any], + num_steps: int | None, + output_video: str | PathLike[str] | None = None, + render_spacing: float | None = None, + render_offset_mode: str | None = None, + headless: bool | None = None, + record_video: bool | None = None, + frame_state_getter: Callable[[], np.ndarray] | None = None, + camera_kwargs: dict[str, Any] | None = None, + extra_data_getter: Callable[[], np.ndarray | None] | None = None, + ) -> str | None: + # Playback keeps Drake as the physics backend. The helper owns rendering + # and video capture so training code can use one playback contract. + return run_drake_playback( + env=env, + initialize=initialize, + step=step, + num_steps=num_steps, + output_video=output_video, + render_spacing=render_spacing, + render_offset_mode=render_offset_mode, + headless=bool(headless), + record_video=bool(record_video), + frame_state_getter=frame_state_getter, + camera_kwargs=camera_kwargs, + extra_data_getter=extra_data_getter, + ) + + def init_renderer( + self, + spacing: float = 1.0, + *, + offset_mode: str = "grid", + headless: bool = False, + capture: bool = False, + width: int = 1280, + height: int = 720, + camera_kwargs: dict[str, Any] | None = None, + ) -> None: + del spacing, offset_mode, headless, capture, width, height, camera_kwargs + raise NotImplementedError("DrakeUni batch backend records through run_playback") + + def render(self) -> None: + raise NotImplementedError("DrakeUni batch backend does not support interactive rendering") + + def capture_video_frame(self) -> np.ndarray: + raise NotImplementedError("DrakeUni batch backend records through run_playback") + + # Runtime state getters. + # + # ``physics_state`` is DrakeUni's compact per-env packet used by playback + # and debugging. Sensor-specific getters below expose named slices/packets. + def get_physics_state(self) -> np.ndarray: + return self._physics_state.copy() + + def get_playback_model(self, env_index: int | None = None) -> str: + if env_index is not None: + idx = int(env_index) + if idx < 0 or idx >= self._num_envs: + raise IndexError(f"env_index must be in [0, {self._num_envs - 1}], got {idx}") + return self._scene_model_file + + def diagnostics(self) -> Any: + return self._runtime.diagnostics() + + def apply_body_force( + self, + body_ids: np.ndarray, + force: np.ndarray, + ) -> None: + ids = np.asarray(body_ids, dtype=np.int32).reshape(-1) + values = np.asarray(force, dtype=np.float64) + expected_shape = (self._num_envs, ids.size, 3) + if values.shape != expected_shape: + raise ValueError(f"body force must have shape {expected_shape}, got {values.shape}") + for offset, body_id in enumerate(ids): + if body_id < 0 or body_id >= self._num_bodies: + raise IndexError(f"body id {int(body_id)} is outside [0, {self._num_bodies - 1}]") + self._pending_body_forces[:, int(body_id), :] += values[:, offset, :] + + # Sensor access. + # + # DrakeUni returns one flat sensor array; this class owns the MuJoCo-style + # named views over that array. + def get_base_pos(self) -> np.ndarray: + self._require_floating_root() + return self._physics_state[:, 1:4].copy() + + def get_base_quat(self) -> np.ndarray: + self._require_floating_root() + return self._physics_state[:, 4:8].copy() + + def get_base_lin_vel(self) -> np.ndarray: + qvel_start = 1 + self._model.nq + return self._physics_state[:, qvel_start : qvel_start + 3].copy() + + def get_base_ang_vel(self) -> np.ndarray: + qvel_start = 1 + self._model.nq + return self._physics_state[:, qvel_start + 3 : qvel_start + 6].copy() + + def get_dof_pos(self) -> np.ndarray: + return self._physics_state[:, 1 + self._actuator_qpos_adr].copy() + + def get_dof_vel(self) -> np.ndarray: + qvel_start = 1 + self._model.nq + return self._physics_state[:, qvel_start + self._actuator_qvel_adr].copy() + + def get_body_pos_w(self, body_ids: np.ndarray) -> np.ndarray: + return self._body_state(body_ids)["pos"] + + def get_body_quat_w(self, body_ids: np.ndarray) -> np.ndarray: + return self._body_state(body_ids)["quat"] + + def get_body_lin_vel_w(self, body_ids: np.ndarray) -> np.ndarray: + return self._body_state(body_ids)["linvel"] + + def get_body_ang_vel_w(self, body_ids: np.ndarray) -> np.ndarray: + return self._body_state(body_ids)["angvel"] + + def get_body_pos_b(self, body_ids: np.ndarray) -> np.ndarray: + body_state = self._body_state(body_ids) + base_pos = self.get_base_pos() + base_rot = _quat_to_rotation_matrix(self.get_base_quat()) + delta = body_state["pos"] - base_pos[:, None, :] + return np.einsum("nij,nkj->nki", np.swapaxes(base_rot, 1, 2), delta) + + def get_body_quat_b(self, body_ids: np.ndarray) -> np.ndarray: + body_quat = self._body_state(body_ids)["quat"] + base_inv = _quat_conjugate(self.get_base_quat()) + return _quat_multiply(base_inv[:, None, :], body_quat) + + def get_body_lin_vel_b(self, body_ids: np.ndarray) -> np.ndarray: + base_rot = _quat_to_rotation_matrix(self.get_base_quat()) + return np.einsum( + "nij,nkj->nki", + np.swapaxes(base_rot, 1, 2), + self._body_state(body_ids)["linvel"], + ) + + def get_body_ang_vel_b(self, body_ids: np.ndarray) -> np.ndarray: + base_rot = _quat_to_rotation_matrix(self.get_base_quat()) + return np.einsum( + "nij,nkj->nki", + np.swapaxes(base_rot, 1, 2), + self._body_state(body_ids)["angvel"], + ) + + def get_sensor_data(self, name: str) -> np.ndarray: + if name in self._sensor_views: + return self._sensor_views[name].copy() + raise KeyError(f"Unknown DrakeUni sensor: {name}") + + # Internal helpers. + def _sync_runtime_state(self, output: dict[str, Any] | None = None) -> None: + # Keep UniLab's cached state/sensor views aligned after every DrakeUni update. + if output is None: + self._physics_state = self._runtime.physics_state() + sensor_data = self._runtime.sensor_data() + elif "env_ids" in output: + indices = np.asarray(output["env_ids"], dtype=np.int32) + self._physics_state[indices] = np.asarray(output["state"], dtype=np.float64) + self._sensor_data[indices] = np.asarray(output["sensor_data"], dtype=np.float64) + self._rebuild_sensor_views() + return + else: + self._physics_state = np.asarray(output["state"], dtype=np.float64).copy() + sensor_data = output["sensor_data"] + self._sensor_data = np.asarray(sensor_data, dtype=np.float64).copy() + self._rebuild_sensor_views() + + def _rebuild_sensor_views(self) -> None: + self._sensor_views = {} + for index, name in enumerate(self._sensor_names): + adr = int(self._sensor_adr[index]) + dim = int(self._sensor_dim[index]) + self._sensor_views[name] = self._sensor_data[:, adr : adr + dim] + + def _body_state(self, body_ids: np.ndarray) -> dict[str, np.ndarray]: + ids = np.asarray(body_ids, dtype=np.int32) + if ids.ndim != 1: + raise ValueError(f"body_ids must be one-dimensional, got {ids.shape}") + return cast(dict[str, np.ndarray], self._runtime.compute_body_state(ids)) + + def _pending_body_forces_or_none(self) -> np.ndarray | None: + if np.any(self._pending_body_forces): + return self._pending_body_forces + return None + + def _require_single_dof_joint(self, name: str) -> None: + dims = self._joint_dims_by_name.get(name) + if dims is None: + raise ValueError(f"Drake model does not contain joint {name!r}") + if dims != (1, 1): + raise ValueError(f"Drake joint {name!r} is not a single-DoF joint") + + def _require_floating_root(self) -> None: + if self._model.nq < ROOT_QPOS_DIM or self._model.nv < ROOT_QVEL_DIM: + raise NotImplementedError( + "DrakeBackend root-state helpers require a floating-root compact state" + ) + + +def _quat_conjugate(quat: np.ndarray) -> np.ndarray: + values = np.asarray(quat, dtype=np.float64).copy() + values[..., 1:] *= -1.0 + return values + + +def _quat_multiply(lhs: np.ndarray, rhs: np.ndarray) -> np.ndarray: + a = np.asarray(lhs, dtype=np.float64) + b = np.asarray(rhs, dtype=np.float64) + aw, ax, ay, az = np.moveaxis(a, -1, 0) + bw, bx, by, bz = np.moveaxis(b, -1, 0) + return np.stack( + ( + aw * bw - ax * bx - ay * by - az * bz, + aw * bx + ax * bw + ay * bz - az * by, + aw * by - ax * bz + ay * bw + az * bx, + aw * bz + ax * by - ay * bx + az * bw, + ), + axis=-1, + ) + + +def _quat_to_rotation_matrix(quat: np.ndarray) -> np.ndarray: + values = np.asarray(quat, dtype=np.float64) + norm = np.linalg.norm(values, axis=-1, keepdims=True) + q = np.divide(values, np.maximum(norm, 1.0e-12)) + w, x, y, z = np.moveaxis(q, -1, 0) + matrix = np.empty((*q.shape[:-1], 3, 3), dtype=np.float64) + matrix[..., 0, 0] = 1.0 - 2.0 * (y * y + z * z) + matrix[..., 0, 1] = 2.0 * (x * y - z * w) + matrix[..., 0, 2] = 2.0 * (x * z + y * w) + matrix[..., 1, 0] = 2.0 * (x * y + z * w) + matrix[..., 1, 1] = 1.0 - 2.0 * (x * x + z * z) + matrix[..., 1, 2] = 2.0 * (y * z - x * w) + matrix[..., 2, 0] = 2.0 * (x * z - y * w) + matrix[..., 2, 1] = 2.0 * (y * z + x * w) + matrix[..., 2, 2] = 1.0 - 2.0 * (x * x + y * y) + return matrix + + +__all__ = [ + "DRAKE_AVAILABLE", + "DRAKE_IMPORT_ERROR", + "DRAKE_BATCH_AVAILABLE", + "DRAKE_BATCH_IMPORT_ERROR", + "DrakeBackend", + "_resolve_batch_nthread", + "ensure_drake_batch_available", +] diff --git a/src/unilab/base/backend/drake/playback.py b/src/unilab/base/backend/drake/playback.py new file mode 100644 index 000000000..ab8c358b7 --- /dev/null +++ b/src/unilab/base/backend/drake/playback.py @@ -0,0 +1,62 @@ +"""Drake-owned playback execution helpers.""" + +from __future__ import annotations + +from os import PathLike +from typing import Any, Callable, TypeVar + +import numpy as np + +from unilab.base.backend.mujoco.playback import run_mujoco_playback + +ObsT = TypeVar("ObsT") + + +def run_drake_playback( + *, + env: Any, + initialize: Callable[[], ObsT], + step: Callable[[ObsT], ObsT], + num_steps: int | None, + output_video: str | PathLike[str] | None, + render_spacing: float | None, + render_offset_mode: str | None, + headless: bool, + record_video: bool, + frame_state_getter: Callable[[], np.ndarray] | None, + camera_kwargs: dict[str, Any] | None, + extra_data_getter: Callable[[], np.ndarray | None] | None = None, +) -> str | None: + """Run Drake physics playback and optionally render it with MuJoCo. + + Drake owns the rollout: ``step`` must advance the Drake backend and + ``frame_state_getter`` must read Drake state snapshots. MuJoCo is used only + as an offline visual renderer for the recorded state sequence. + """ + del render_offset_mode + if record_video: + return run_mujoco_playback( + env=env, + initialize=initialize, + step=step, + num_steps=num_steps, + output_video=output_video, + render_spacing=render_spacing, + headless=True, + record_video=True, + frame_state_getter=frame_state_getter, + camera_kwargs=camera_kwargs, + extra_data_getter=extra_data_getter, + ) + if not headless: + raise NotImplementedError("Drake playback does not support interactive rendering yet.") + + obs = initialize() + steps_run = 0 + while num_steps is None or steps_run < int(num_steps): + obs = step(obs) + steps_run += 1 + return None + + +__all__ = ["run_drake_playback"] diff --git a/src/unilab/base/base.py b/src/unilab/base/base.py index 735b11d91..9510ce249 100644 --- a/src/unilab/base/base.py +++ b/src/unilab/base/base.py @@ -34,6 +34,8 @@ class EnvCfg: ctrl_dt: float = 0.01 render_spacing: float = 1.0 render_offset_mode: str = "grid" + drake_backend_mode: str = "batch" + drake_nthread: int = 0 motrix_max_iterations: Optional[int] = None post_step_forward_sensor: bool = False diff --git a/src/unilab/base/registry.py b/src/unilab/base/registry.py index dbe9ff4cc..deec7da69 100644 --- a/src/unilab/base/registry.py +++ b/src/unilab/base/registry.py @@ -20,6 +20,7 @@ from .base import ABEnv, EnvCfg TEnvCfg = TypeVar("TEnvCfg", bound=EnvCfg) +_SUPPORTED_SIM_BACKENDS = ("mujoco", "motrix", "drake") _DEFAULT_SIM_BACKEND_ORDER: tuple[str, ...] = ("mujoco", "motrix") _REGISTRY_MODULES_ATTR = "__unilab_registry_modules__" _DEFAULT_REGISTRY_PACKAGES = ( @@ -87,9 +88,10 @@ def decorator(cls: Type[TEnvCfg]) -> Type[TEnvCfg]: def register_env(name: str, env_cls: Type[ABEnv], sim_backend: str): """Register an environment class with a name and simulation backend.""" - if sim_backend not in ["mujoco", "motrix"]: + if sim_backend not in _SUPPORTED_SIM_BACKENDS: raise ValueError( - f"Unsupported simulation backend: {sim_backend}. Only 'mujoco' and 'motrix' are supported." + f"Unsupported simulation backend: {sim_backend}. " + f"Supported backends: {', '.join(_SUPPORTED_SIM_BACKENDS)}." ) if name not in _envs: @@ -208,7 +210,7 @@ def make( Args: name: Environment name - sim_backend: Simulation backend ("mujoco" or "motrix"). If None, uses the + sim_backend: Simulation backend. If None, uses the explicit default backend order: "mujoco", then "motrix". num_envs: Number of environments to create diff --git a/src/unilab/envs/locomotion/go1/joystick.py b/src/unilab/envs/locomotion/go1/joystick.py index ed049cdb1..a1196d80e 100644 --- a/src/unilab/envs/locomotion/go1/joystick.py +++ b/src/unilab/envs/locomotion/go1/joystick.py @@ -86,6 +86,7 @@ def _compute_reset_obs( @registry.env("Go1JoystickFlat", sim_backend="mujoco") @registry.env("Go1JoystickFlat", sim_backend="motrix") +@registry.env("Go1JoystickFlat", sim_backend="drake") class Go1WalkTask(Go1BaseEnv): _cfg: Go1JoystickCfg @@ -103,6 +104,8 @@ def __init__(self, cfg: Go1JoystickCfg, num_envs=1, backend_type="mujoco"): base_name=cfg.asset.base_name, push_body_name=cfg.domain_rand.push_body_name, position_actuator_gains={"kp": cfg.control_config.Kp, "kd": cfg.control_config.Kd}, + drake_backend_mode=cfg.drake_backend_mode, + drake_nthread=cfg.drake_nthread, motrix_max_iterations=cfg.motrix_max_iterations, post_step_forward_sensor=cfg.post_step_forward_sensor, ) diff --git a/src/unilab/envs/locomotion/go2/footstand.py b/src/unilab/envs/locomotion/go2/footstand.py index 02cc7f256..a18e72676 100644 --- a/src/unilab/envs/locomotion/go2/footstand.py +++ b/src/unilab/envs/locomotion/go2/footstand.py @@ -139,6 +139,8 @@ def __init__(self, cfg: Go2HandStandCfg, num_envs=1, backend_type="mujoco"): position_actuator_gains={"kp": cfg.control_config.Kp, "kd": cfg.control_config.Kd}, motrix_max_iterations=cfg.motrix_max_iterations, post_step_forward_sensor=cfg.post_step_forward_sensor, + drake_backend_mode=cfg.drake_backend_mode, + drake_nthread=cfg.drake_nthread, ) super().__init__(cfg, backend, num_envs) self._enable_reward_log = True @@ -420,6 +422,7 @@ def _compute_reset_obs( @registry.env("Go2FootStand", sim_backend="mujoco") +@registry.env("Go2FootStand", sim_backend="drake") class Go2FootStandTask(Go2HandStandTask): _cfg: Go2FootStandCfg @@ -438,12 +441,37 @@ def __init__(self, cfg: Go2FootStandCfg, num_envs=1, backend_type="mujoco"): dtype=get_global_dtype(), ) self._critic_obs_history = np.zeros_like(self._obs_history) - self._base_geom_friction = self._backend.get_geom_friction() - self._floor_geom_id = self._backend.get_geom_id(self._cfg.asset.ground) - self._base_body_id = self._backend.get_body_id(self._cfg.asset.base_name) - self._base_body_mass = self._backend.get_body_mass() - self._base_body_ipos = self._backend.get_body_ipos() - self._base_dof_armature = self._backend.get_dof_armature() + domain_rand = self._cfg.domain_rand + needs_floor_friction = bool(domain_rand.randomize_floor_friction) + needs_body_mass = bool( + domain_rand.randomize_link_mass or domain_rand.torso_added_mass_range is not None + ) + needs_body_ipos = bool(domain_rand.randomize_torso_com) + needs_dof_armature = bool(domain_rand.randomize_dof_armature) + self._base_geom_friction = ( + self._backend.get_geom_friction() + if needs_floor_friction + else np.zeros((0, 3), dtype=np.float64) + ) + self._floor_geom_id = ( + self._backend.get_geom_id(self._cfg.asset.ground) if needs_floor_friction else -1 + ) + self._base_body_id = ( + self._backend.get_body_id(self._cfg.asset.base_name) + if needs_body_mass or needs_body_ipos + else -1 + ) + self._base_body_mass = ( + self._backend.get_body_mass() if needs_body_mass else np.zeros((0,), dtype=np.float64) + ) + self._base_body_ipos = ( + self._backend.get_body_ipos() if needs_body_ipos else np.zeros((0, 3), dtype=np.float64) + ) + self._base_dof_armature = ( + self._backend.get_dof_armature() + if needs_dof_armature + else np.zeros((0,), dtype=np.float64) + ) self._knee_body_ids = self._backend.get_body_ids(_FOOTSTAND_KNEE_BODY_NAMES) self._init_domain_randomization(Go2FootStandDomainRandomizationProvider()) diff --git a/src/unilab/envs/locomotion/go2/joystick.py b/src/unilab/envs/locomotion/go2/joystick.py index 1e664f96e..d16014fa5 100644 --- a/src/unilab/envs/locomotion/go2/joystick.py +++ b/src/unilab/envs/locomotion/go2/joystick.py @@ -94,6 +94,7 @@ def _compute_reset_obs( @registry.env("Go2JoystickFlat", sim_backend="mujoco") @registry.env("Go2JoystickFlat", sim_backend="motrix") +@registry.env("Go2JoystickFlat", sim_backend="drake") class Go2WalkTask(Go2BaseEnv): _cfg: Go2JoystickCfg @@ -113,6 +114,8 @@ def __init__(self, cfg: Go2JoystickCfg, num_envs=1, backend_type="mujoco"): base_name=cfg.asset.base_name, push_body_name=cfg.domain_rand.push_body_name, position_actuator_gains={"kp": cfg.control_config.Kp, "kd": cfg.control_config.Kd}, + drake_backend_mode=cfg.drake_backend_mode, + drake_nthread=cfg.drake_nthread, motrix_max_iterations=cfg.motrix_max_iterations, post_step_forward_sensor=cfg.post_step_forward_sensor, ) @@ -190,7 +193,16 @@ def update_state(self, state: NpEnvState) -> NpEnvState: dof_vel = self.get_dof_vel() self.feet_force[:, :, :] = 0 for i in range(len(self._cfg.sensor.feet_force)): - self.feet_force[:, i, :] = self._backend.get_sensor_data(self._cfg.sensor.feet_force[i]) + contact = self._backend.get_sensor_data(self._cfg.sensor.feet_force[i]) + if contact.shape[1] == 1: + self.feet_force[:, i, 2] = contact[:, 0] + elif contact.shape[1] == 3: + self.feet_force[:, i, :] = contact + else: + raise ValueError( + "foot contact sensor must return either scalar found flags " + f"or 3D force vectors, got {contact.shape}" + ) for i in range(len(self._cfg.sensor.feet_pos)): self.feet_pos[:, i, :] = self._backend.get_sensor_data(self._cfg.sensor.feet_pos[i]) terminated = gravity[:, 2] <= 0.5 diff --git a/src/unilab/envs/locomotion/go2_arm/manip_loco.py b/src/unilab/envs/locomotion/go2_arm/manip_loco.py index ded6f4457..e108ca662 100644 --- a/src/unilab/envs/locomotion/go2_arm/manip_loco.py +++ b/src/unilab/envs/locomotion/go2_arm/manip_loco.py @@ -279,6 +279,7 @@ def _compute_reset_obs( @registry.env("Go2ArmManipLoco", sim_backend="motrix") +@registry.env("Go2ArmManipLoco", sim_backend="drake") @registry.env("Go2ArmManipLoco", sim_backend="mujoco") class Go2ArmManipLocoEnv(Go2ArmBaseEnv): _cfg: Go2ArmManipLocoCfg @@ -286,9 +287,9 @@ class Go2ArmManipLocoEnv(Go2ArmBaseEnv): def __init__(self, cfg: Go2ArmManipLocoCfg, num_envs=1, backend_type="mujoco"): if cfg.reward_config is None: raise ValueError("reward_config must be provided via Hydra configuration") - if backend_type not in {"mujoco", "motrix"}: + if backend_type not in {"drake", "mujoco", "motrix"}: raise ValueError( - "Go2ArmManipLoco supports only the mujoco and motrix backends, " + "Go2ArmManipLoco supports only the drake, mujoco and motrix backends, " f"got {backend_type!r}" ) @@ -296,10 +297,12 @@ def __init__(self, cfg: Go2ArmManipLocoCfg, num_envs=1, backend_type="mujoco"): backend_kwargs: dict[str, Any] = { "base_name": cfg.asset.base_name, "push_body_name": cfg.domain_rand.push_body_name, + "drake_backend_mode": cfg.drake_backend_mode, + "drake_nthread": cfg.drake_nthread, } if backend_type == "motrix": backend_kwargs["motrix_max_iterations"] = cfg.iterations - else: + elif backend_type == "mujoco": backend_kwargs["position_actuator_gains"] = build_go2_arm_position_gains( cfg.control_config ) @@ -632,14 +635,6 @@ def apply_action(self, actions: np.ndarray, state: NpEnvState) -> np.ndarray: else effective_actions ) - ee_local_pos, ee_local_quat = self.get_ee_local_pose() - dq_ik = self.compute_arm_ik_delta( - self.curr_ee_goal_cart, - ee_local_pos, - self.ee_goal_orn_quat, - ee_local_quat, - ) - leg_ctrl = ( exec_actions[:, :12] * self._cfg.control_config.action_scale + self.default_angles[:12] ) @@ -649,6 +644,13 @@ def apply_action(self, actions: np.ndarray, state: NpEnvState) -> np.ndarray: copy=False, ) else: + ee_local_pos, ee_local_quat = self.get_ee_local_pose() + dq_ik = self.compute_arm_ik_delta( + self.curr_ee_goal_cart, + ee_local_pos, + self.ee_goal_orn_quat, + ee_local_quat, + ) arm_ctrl = ( self.get_arm_dof_pos() + exec_actions[:, 12:18] * self._cfg.control_config.arm_action_scale diff --git a/src/unilab/envs/locomotion/go2w/joystick.py b/src/unilab/envs/locomotion/go2w/joystick.py index cd26f0b03..19af08ea2 100644 --- a/src/unilab/envs/locomotion/go2w/joystick.py +++ b/src/unilab/envs/locomotion/go2w/joystick.py @@ -227,6 +227,7 @@ def _sample_commands(self, env: Any, num_reset: int) -> np.ndarray: @registry.env("Go2WJoystickFlat", sim_backend="mujoco") +@registry.env("Go2WJoystickFlat", sim_backend="drake") class Go2WJoystickEnv(Go2WBaseEnv): _cfg: Go2WJoystickCfg @@ -240,6 +241,8 @@ def __init__(self, cfg: Go2WJoystickCfg, num_envs=1, backend_type="mujoco"): cfg.sim_dt, base_name=cfg.asset.base_name, push_body_name=cfg.domain_rand.push_body_name, + drake_backend_mode=cfg.drake_backend_mode, + drake_nthread=cfg.drake_nthread, motrix_max_iterations=cfg.motrix_max_iterations, post_step_forward_sensor=cfg.post_step_forward_sensor, ) diff --git a/src/unilab/envs/manipulation/allegro_inhand/rotation.py b/src/unilab/envs/manipulation/allegro_inhand/rotation.py index 7fef1a190..36f761b74 100644 --- a/src/unilab/envs/manipulation/allegro_inhand/rotation.py +++ b/src/unilab/envs/manipulation/allegro_inhand/rotation.py @@ -247,6 +247,7 @@ def build_reset_observation( # ─────────────────────────── Environment ────────────────────────────── +@registry.env("AllegroInhandRotation", sim_backend="drake") @registry.env("AllegroInhandRotation", sim_backend="mujoco") @registry.env("AllegroInhandRotation", sim_backend="motrix") class AllegroRotationPPO(AllegroBaseEnv): @@ -276,6 +277,8 @@ def __init__( }, motrix_max_iterations=cfg.motrix_max_iterations, post_step_forward_sensor=cfg.post_step_forward_sensor, + drake_backend_mode=cfg.drake_backend_mode, + drake_nthread=cfg.drake_nthread, ) super().__init__(cfg, backend, num_envs) self._enable_reward_log = True diff --git a/src/unilab/envs/manipulation/sharpa_inhand/rotation.py b/src/unilab/envs/manipulation/sharpa_inhand/rotation.py index 1245107d6..68b1d5a8b 100644 --- a/src/unilab/envs/manipulation/sharpa_inhand/rotation.py +++ b/src/unilab/envs/manipulation/sharpa_inhand/rotation.py @@ -456,6 +456,7 @@ def build_interval_randomization_plan( ) +@registry.env("SharpaInhandRotation", sim_backend="drake") @registry.env("SharpaInhandRotation", sim_backend="mujoco") @registry.env("SharpaInhandRotation", sim_backend="motrix") class SharpaInhandRotationEnv(SharpaInhandBaseEnv): @@ -487,6 +488,8 @@ def __init__( add_body_sensors=True, motrix_max_iterations=cfg.motrix_max_iterations, post_step_forward_sensor=cfg.post_step_forward_sensor, + drake_backend_mode=cfg.drake_backend_mode, + drake_nthread=cfg.drake_nthread, ) super().__init__(cfg, backend, num_envs) diff --git a/src/unilab/envs/manipulation/stewart/balance.py b/src/unilab/envs/manipulation/stewart/balance.py index f76fcc5f4..363bdb149 100644 --- a/src/unilab/envs/manipulation/stewart/balance.py +++ b/src/unilab/envs/manipulation/stewart/balance.py @@ -137,6 +137,7 @@ def _roll_pitch_from_quat(quat: np.ndarray) -> tuple[np.ndarray, np.ndarray]: # backend is the validated training path. @registry.env("StewartBalance", sim_backend="mujoco") @registry.env("StewartBalance", sim_backend="motrix") +@registry.env("StewartBalance", sim_backend="drake") class StewartBalanceEnv(NpEnv): _cfg: StewartBalanceCfg diff --git a/src/unilab/envs/motion_tracking/g1/tracking.py b/src/unilab/envs/motion_tracking/g1/tracking.py index fb013191a..f1379b7c5 100644 --- a/src/unilab/envs/motion_tracking/g1/tracking.py +++ b/src/unilab/envs/motion_tracking/g1/tracking.py @@ -443,6 +443,7 @@ def build_reset_observation( ) +@registry.env("G1MotionTracking", sim_backend="drake") @registry.env("G1MotionTracking", sim_backend="mujoco") @registry.env("G1MotionTracking", sim_backend="motrix") class G1MotionTrackingEnv(G1BaseEnv): @@ -464,6 +465,8 @@ def __init__(self, cfg: G1MotionTrackingCfg, num_envs=1, backend_type="mujoco"): add_body_sensors=True, motrix_max_iterations=cfg.motrix_max_iterations, post_step_forward_sensor=cfg.post_step_forward_sensor, + drake_backend_mode=cfg.drake_backend_mode, + drake_nthread=cfg.drake_nthread, ) super().__init__(cfg, backend, num_envs) diff --git a/src/unilab/training/run.py b/src/unilab/training/run.py index 013dc37d9..f07054e4c 100644 --- a/src/unilab/training/run.py +++ b/src/unilab/training/run.py @@ -28,8 +28,8 @@ def log_playback_plan(plan: BackendPlayRenderPlan, *, prefix: str = "") -> None: if plan.record_video: print(f"{prefix}Rendering video to {plan.output_video}...") elif plan.mode == "interactive": - print(f"{prefix}Starting interactive visualization (motrix native renderer)...") - print(f"{prefix}Close the render window to exit.") + print(f"{prefix}Starting interactive visualization...") + print(f"{prefix}Use the renderer window or browser URL reported by the backend.") else: print(f"{prefix}Running playback without video recording...") print(f"{prefix}Rendering playback frames...") diff --git a/tests/base/backend/test_drake_batch_pool.py b/tests/base/backend/test_drake_batch_pool.py new file mode 100644 index 000000000..456a7ada0 --- /dev/null +++ b/tests/base/backend/test_drake_batch_pool.py @@ -0,0 +1,828 @@ +from __future__ import annotations + +import importlib.util +import json +import subprocess +import sys +import textwrap + +import pytest + + +def _module_available(name: str) -> bool: + try: + return importlib.util.find_spec(name) is not None + except ModuleNotFoundError: + return False + + +def _drakeuni_package_installed() -> bool: + return _module_available("drakeuni") + + +def _batch_extension_built() -> bool: + return _module_available("drakeuni.compiled._drake_env_pool") + + +def _run_clean_python(code: str) -> str: + result = subprocess.run( + [sys.executable, "-c", textwrap.dedent(code)], + check=True, + text=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + return result.stdout + + +_GO1_POOL_HELPER = """ +import xml.etree.ElementTree as ET + +import numpy as np + +from unilab.assets import ASSETS_ROOT_PATH +from drakeuni.batch_env import DrakeEnvPool +from drakeuni.runtime.mjcf_model_parser import ( + materialize_drake_compatible_mjcf, + parse_mjcf_model_contract, + sensor_frames_as_pool_inputs, +) + + +def make_go1_pool(nbatch, nthread): + source_model = ASSETS_ROOT_PATH / "robots/go1/scene_flat.xml" + drake_model = materialize_drake_compatible_mjcf(source_model) + contract = parse_mjcf_model_contract(drake_model.model_file) + ( + sensor_frame_body_indices, + sensor_frame_offsets, + sensor_frame_ref_body_indices, + sensor_frame_ref_offsets, + ) = sensor_frames_as_pool_inputs(contract) + scene_root = ET.parse(source_model).getroot() + qpos = np.fromstring( + scene_root.find('.//key[@name="home"]').attrib["qpos"], + sep=" ", + dtype=np.float64, + ) + qvel = np.zeros(18, dtype=np.float64) + state = np.zeros((nbatch, 1 + qpos.size + qvel.size), dtype=np.float64) + state[:, 1 : 1 + qpos.size] = qpos + state[:, 1 + qpos.size :] = qvel + pool = DrakeEnvPool( + str(drake_model.model_file), + nbatch, + 0.01, + contract.ctrl_limits, + contract.torque_limits, + contract.actuator_kind, + contract.actuator_gear, + contract.actuator_stiffness, + contract.actuator_damping, + contract.actuator_gainprm, + contract.actuator_biasprm, + contract.joint_layout_kind, + contract.joint_layout_qpos_adr, + contract.joint_layout_qvel_adr, + contract.joint_layout_qpos_dim, + contract.joint_layout_qvel_dim, + contract.joint_layout_names, + contract.joint_layout_body_names, + contract.collision_filter_geom_names1, + contract.collision_filter_geom_names2, + sensor_frame_body_indices, + sensor_frame_offsets, + sensor_frame_ref_body_indices, + sensor_frame_ref_offsets, + contract.sensor_type, + contract.sensor_index, + contract.sensor_adr, + contract.sensor_dim, + contract.nsensordata, + nthread, + ) + return pool, qpos, qvel, state +""" + + +def test_batch_import_diagnostic_is_preserved() -> None: + output = _run_clean_python( + """ + import json + + try: + from drakeuni.batch_env import batch_available, batch_import_error + except ImportError as exc: + captured_error = exc + + def batch_available(): + return False + + def batch_import_error(): + return captured_error + + error = batch_import_error() + summary = { + "available": bool(batch_available()), + "error_type": None if error is None else type(error).__name__, + "missing_module": getattr(error, "name", None), + } + print(json.dumps(summary, sort_keys=True)) + """ + ) + summary = json.loads(output.strip().splitlines()[-1]) + if summary["available"]: + assert summary["error_type"] is None + assert summary["missing_module"] is None + else: + assert summary["error_type"] == "ModuleNotFoundError" + assert summary["missing_module"] == "drakeuni" + + +def test_drake_batch_thread_policy_matches_mujoco_auto(monkeypatch: pytest.MonkeyPatch) -> None: + from unilab.base.backend.drake import backend + + monkeypatch.setattr(backend, "cpu_count", lambda: 10) + + assert backend._resolve_batch_nthread(1024, 0) == 20 + assert backend._resolve_batch_nthread(8, 0) == 8 + assert backend._resolve_batch_nthread(1024, 4) == 4 + assert backend._resolve_batch_nthread(2, 8) == 2 + + +def test_batch_backend_mode_rejects_existing_pydrake_module() -> None: + output = _run_clean_python( + """ + import json + import sys + + from unilab.assets import ASSETS_ROOT_PATH + from unilab.base.backend import create_backend + from unilab.base.scene import SceneCfg + + sys.modules["pydrake"] = object() + try: + create_backend( + "drake", + SceneCfg(model_file=str(ASSETS_ROOT_PATH / "robots/go1/scene_flat.xml")), + 1, + 0.01, + drake_backend_mode="batch", + ) + except ImportError as exc: + message = str(exc) + else: + raise AssertionError("batch mode unexpectedly loaded with pydrake present") + assert "pydrake" in message + assert "fresh process" in message + print(json.dumps({"message": message}, sort_keys=True)) + """ + ) + assert "pydrake" in output + + +def test_direct_drake_backend_batch_mode_rejects_existing_pydrake_module() -> None: + output = _run_clean_python( + """ + import json + import sys + + from unilab.assets import ASSETS_ROOT_PATH + from unilab.base.backend.drake.backend import DrakeBackend + from unilab.base.scene import SceneCfg + + sys.modules["pydrake"] = object() + try: + DrakeBackend( + SceneCfg(model_file=str(ASSETS_ROOT_PATH / "robots/go1/scene_flat.xml")), + 1, + 0.01, + drake_backend_mode="batch", + ) + except ImportError as exc: + message = str(exc) + else: + raise AssertionError("direct batch DrakeBackend unexpectedly loaded with pydrake present") + assert "pydrake" in message + assert "fresh process" in message + print(json.dumps({"message": message}, sort_keys=True)) + """ + ) + assert "pydrake" in output + + +def test_create_backend_rejects_pydrake_mode() -> None: + from unilab.assets import ASSETS_ROOT_PATH + from unilab.base.backend import create_backend + from unilab.base.scene import SceneCfg + + with pytest.raises(ValueError, match="drake_backend_mode='batch'"): + create_backend( + "drake", + SceneCfg(model_file=str(ASSETS_ROOT_PATH / "robots/go1/scene_flat.xml")), + 1, + 0.01, + drake_backend_mode="pydrake", + ) + + +def test_direct_drake_backend_rejects_pydrake_mode() -> None: + from unilab.assets import ASSETS_ROOT_PATH + from unilab.base.backend.drake.backend import DrakeBackend + from unilab.base.scene import SceneCfg + + with pytest.raises(ValueError, match="drake_backend_mode='batch'"): + DrakeBackend( + SceneCfg(model_file=str(ASSETS_ROOT_PATH / "robots/go1/scene_flat.xml")), + 1, + 0.01, + drake_backend_mode="pydrake", + ) + + +@pytest.mark.skipif( + not _batch_extension_built(), + reason="optional Drake batch extension has not been built", +) +def test_drake_backend_constructs_without_task_base_name() -> None: + from unilab.assets import ASSETS_ROOT_PATH + from unilab.base.backend import create_backend + from unilab.base.scene import SceneCfg + + backend = create_backend( + "drake", + SceneCfg(model_file=str(ASSETS_ROOT_PATH / "robots/go1/scene_flat.xml")), + 1, + 0.01, + drake_backend_mode="batch", + ) + assert type(backend).__name__ == "DrakeBackend" + + +@pytest.mark.skipif( + not _drakeuni_package_installed(), + reason="optional drakeuni package has not been installed", +) +def test_batch_package_direct_import_rejects_existing_pydrake_module() -> None: + output = _run_clean_python( + """ + import json + import sys + + sys.modules["pydrake"] = object() + from drakeuni.batch_env import ( + DrakeEnvPool, + batch_available, + batch_import_error, + ) + + error = batch_import_error() + assert DrakeEnvPool is None + assert not batch_available() + assert error is not None + assert "pydrake" in str(error) + print(json.dumps({"message": str(error)}, sort_keys=True)) + """ + ) + assert "pydrake" in output + + +@pytest.mark.skipif( + not _batch_extension_built(), + reason="optional Drake batch extension has not been built", +) +def test_drake_batch_pool_imports_in_clean_process() -> None: + output = _run_clean_python( + """ + from drakeuni.batch_env import DrakeEnvPool, batch_available + assert batch_available() + assert DrakeEnvPool is not None + print(DrakeEnvPool.__name__) + """ + ) + assert "DrakeEnvPool" in output + + +@pytest.mark.skipif( + not _drakeuni_package_installed(), + reason="optional drakeuni package has not been installed", +) +def test_drakeuni_runtime_import_is_lazy_and_pydrake_free() -> None: + output = _run_clean_python( + """ + import json + import sys + + from drakeuni.runtime import DrakeBatchConfig + + summary = { + "config": DrakeBatchConfig.__name__, + "compiled_loaded": any(name.startswith("drakeuni.compiled") for name in sys.modules), + "pydrake_loaded": any( + name == "pydrake" or name.startswith("pydrake.") for name in sys.modules + ), + } + print(json.dumps(summary, sort_keys=True)) + """ + ) + assert json.loads(output.strip().splitlines()[-1]) == { + "config": "DrakeBatchConfig", + "compiled_loaded": False, + "pydrake_loaded": False, + } + + +def test_unilab_drake_public_surface_excludes_batch_backend_symbol() -> None: + output = _run_clean_python( + """ + import json + + import unilab.base.backend as backend_root + import unilab.base.backend.drake as drake_pkg + from unilab.base.backend.drake import backend as backend_module + + try: + from unilab.base.backend.drake.backend import DrakeUniBatchBackend # noqa: F401 + except ImportError: + direct_import = "failed" + else: + direct_import = "succeeded" + + summary = { + "direct_import": direct_import, + "root_has_batch": hasattr(backend_root, "DrakeUniBatchBackend"), + "subpackage_has_batch": hasattr(drake_pkg, "DrakeUniBatchBackend"), + "module_has_batch": hasattr(backend_module, "DrakeUniBatchBackend"), + "module_all_has_batch": "DrakeUniBatchBackend" in backend_module.__all__, + } + print(json.dumps(summary, sort_keys=True)) + """ + ) + assert json.loads(output.strip().splitlines()[-1]) == { + "direct_import": "failed", + "module_all_has_batch": False, + "module_has_batch": False, + "root_has_batch": False, + "subpackage_has_batch": False, + } + + +@pytest.mark.skipif( + not _batch_extension_built(), + reason="optional Drake batch extension has not been built", +) +def test_drake_batch_pool_go1_smoke_shapes_and_time() -> None: + output = _run_clean_python( + _GO1_POOL_HELPER + + textwrap.dedent( + """ + import json + + pool, qpos, _, state = make_go1_pool(2, 1) + control = np.tile(qpos[7:], (2, 1)) + state_only = pool.step(state, 2, control, None, False) + output = pool.step(state, 2, control, None, True) + sensor_data = output["sensor_data"] + summary = { + "forward_removed": not hasattr(pool, "forward"), + "state_only_has_sensor_data": "sensor_data" in state_only, + "state_shape": list(output["state"].shape), + "sensor_shape": list(sensor_data.shape), + "has_sensor_data": "sensor_data" in output, + "time": output["state"][:, 0].tolist(), + "state_finite": bool(np.all(np.isfinite(output["state"]))), + "sensor_finite": bool(np.all(np.isfinite(sensor_data))), + "nthread": pool.nthread, + "workspace_count": pool.workspace_count, + "num_filtered_geometries": pool.num_filtered_geometries, + } + print(json.dumps(summary, sort_keys=True)) + """ + ) + ) + summary = json.loads(output.strip().splitlines()[-1]) + assert summary.pop("num_filtered_geometries") > 0 + assert summary == { + "forward_removed": True, + "has_sensor_data": True, + "nthread": 1, + "sensor_finite": True, + "sensor_shape": [2, 42], + "state_finite": True, + "state_only_has_sensor_data": False, + "state_shape": [2, 38], + "time": [0.02, 0.02], + "workspace_count": 1, + } + + +@pytest.mark.skipif( + not _batch_extension_built(), + reason="optional Drake batch extension has not been built", +) +def test_drake_batch_pool_uses_thread_workspaces_not_env_workspaces() -> None: + output = _run_clean_python( + _GO1_POOL_HELPER + + textwrap.dedent( + """ + import json + + _, qpos, qvel, state = make_go1_pool(4, 1) + for env_index in range(4): + row_qpos = qpos.copy() + row_qpos[0] += 0.05 * env_index + state[env_index, 1 : 1 + qpos.size] = row_qpos + state[env_index, 1 + qpos.size :] = qvel + + def make_pool(nthread): + return make_go1_pool(4, nthread)[0] + + control = np.tile(qpos[7:], (4, 1)) + serial_pool = make_pool(1) + threaded_pool = make_pool(2) + serial = serial_pool.step(state, 2, control, None, True) + threaded = threaded_pool.step(state, 2, control, None, True) + serial_sensor = serial["sensor_data"] + threaded_sensor = threaded["sensor_data"] + threaded_snapshot = threaded_pool.snapshot(True) + + reset_state = state[[2]].copy() + reset_state[0, 1] = 1.23 + reset_snapshot = threaded_pool.reset(np.array([2], dtype=np.int32), reset_state, True) + + summary = { + "serial_workspace_count": serial_pool.workspace_count, + "threaded_workspace_count": threaded_pool.workspace_count, + "parity_state": bool(np.allclose(serial["state"], threaded["state"])), + "parity_sensor": bool(np.allclose(serial_sensor, threaded_sensor)), + "snapshot_state": bool(np.allclose(threaded["state"], threaded_snapshot["state"])), + "snapshot_sensor": bool(np.allclose(threaded_sensor, threaded_snapshot["sensor_data"])), + "reset_sensor_finite": bool(np.all(np.isfinite(reset_snapshot["sensor_data"]))), + "reset_sensor_shape": list(reset_snapshot["sensor_data"].shape), + "reset_times": reset_snapshot["state"][:, 0].round(6).tolist(), + "reset_x": reset_snapshot["state"][:, 1].round(6).tolist(), + } + print(json.dumps(summary, sort_keys=True)) + """ + ) + ) + summary = json.loads(output.strip().splitlines()[-1]) + assert summary == { + "parity_sensor": True, + "parity_state": True, + "reset_sensor_finite": True, + "reset_sensor_shape": [1, 42], + "reset_times": [0.0], + "reset_x": [1.23], + "serial_workspace_count": 1, + "snapshot_sensor": True, + "snapshot_state": True, + "threaded_workspace_count": 2, + } + + +@pytest.mark.skipif( + not _batch_extension_built(), + reason="optional Drake batch extension has not been built", +) +def test_drake_batch_pool_worker_exception_reaches_python() -> None: + output = _run_clean_python( + _GO1_POOL_HELPER + + textwrap.dedent( + """ + import json + + pool, qpos, _, state = make_go1_pool(4, 2) + state[2, 0] = np.nan + control = np.tile(qpos[7:], (4, 1)) + try: + pool.step(state, 1, control, None, True) + except Exception as exc: + summary = {"error_type": type(exc).__name__, "message": str(exc)} + else: + raise AssertionError("non-finite worker state unexpectedly succeeded") + print(json.dumps(summary, sort_keys=True)) + """ + ) + ) + summary = json.loads(output.strip().splitlines()[-1]) + assert summary["error_type"] == "ValueError" + assert "non-finite" in summary["message"] + + +@pytest.mark.skipif( + not _batch_extension_built(), + reason="optional Drake batch extension has not been built", +) +def test_drake_runtime_stewart_compact_state_matches_mujoco_qpos_order() -> None: + output = _run_clean_python( + """ + import json + + import mujoco + import numpy as np + + from unilab.assets import ASSETS_ROOT_PATH + from drakeuni.runtime import DrakeBatchConfig, create_runtime + + model = ASSETS_ROOT_PATH / "robots/stewart/scene.xml" + runtime = create_runtime( + DrakeBatchConfig(model_file=str(model), num_envs=1, sim_dt=0.005, nthread=1) + ) + drake_home = runtime.model_info().home_qpos + mujoco_home = mujoco.MjModel.from_xml_path(str(model)).qpos0 + summary = { + "shape": list(drake_home.shape), + "matches_mujoco": bool(np.allclose(drake_home, mujoco_home)), + "ball_z": float(drake_home[2]), + "top_z": float(drake_home[9]), + "max_abs_diff": float(np.max(np.abs(drake_home - mujoco_home))), + } + print(json.dumps(summary, sort_keys=True)) + """ + ) + assert json.loads(output.strip().splitlines()[-1]) == { + "ball_z": 1.18, + "matches_mujoco": True, + "max_abs_diff": 0.0, + "shape": [32], + "top_z": 1.0, + } + + +@pytest.mark.skipif( + not _batch_extension_built(), + reason="optional Drake batch extension has not been built", +) +def test_drake_runtime_stewart_ball_collides_with_top_plate() -> None: + output = _run_clean_python( + """ + import json + + import numpy as np + + from unilab.assets import ASSETS_ROOT_PATH + from drakeuni.runtime import DrakeBatchConfig, create_runtime + + model = ASSETS_ROOT_PATH / "robots/stewart/scene.xml" + runtime = create_runtime( + DrakeBatchConfig(model_file=str(model), num_envs=1, sim_dt=0.005, nthread=1) + ) + info = runtime.model_info() + ball_id, top_id = runtime.body_ids(["ball", "top"]) + control = np.zeros((1, info.nu), dtype=np.float64) + runtime.step(control, 720) + body_state = runtime.compute_body_state([int(ball_id), int(top_id)]) + ball_z = float(body_state["pos"][0, 0, 2]) + top_z = float(body_state["pos"][0, 1, 2]) + summary = { + "ball_z": round(ball_z, 6), + "top_z": round(top_z, 6), + "gap": round(ball_z - top_z, 6), + "num_filtered_geometries": runtime.diagnostics().num_filtered_geometries, + } + print(json.dumps(summary, sort_keys=True)) + """ + ) + summary = json.loads(output.strip().splitlines()[-1]) + assert summary["num_filtered_geometries"] == 0 + assert summary["gap"] > 0.15 + assert summary["ball_z"] > 1.1 + + +@pytest.mark.skipif( + not _batch_extension_built(), + reason="optional Drake batch extension has not been built", +) +def test_create_backend_batch_mode_avoids_pydrake_and_steps() -> None: + output = _run_clean_python( + """ + import json + import sys + + import numpy as np + + from unilab.assets import ASSETS_ROOT_PATH + from unilab.base.backend import create_backend + from unilab.base.scene import SceneCfg + + assert "pydrake" not in sys.modules + backend = create_backend( + "drake", + SceneCfg(model_file=str(ASSETS_ROOT_PATH / "robots/go1/scene_flat.xml")), + 2, + 0.01, + drake_backend_mode="batch", + drake_nthread=2, + ) + assert "pydrake" not in sys.modules + qpos = np.stack([backend.get_keyframe_qpos("home") for _ in range(2)]) + qvel = np.stack([backend.get_init_qvel() for _ in range(2)]) + backend.set_state(np.arange(2, dtype=np.int32), qpos, qvel) + backend.step(backend.get_dof_pos(), nsteps=2) + for _ in range(100): + backend.step(backend.get_dof_pos(), nsteps=1) + diagnostics = backend.diagnostics() + foot_contact = backend.get_sensor_data("FL_foot_contact") + body_ids = backend.get_body_ids(["trunk", "FR_calf"]) + body_pos = backend.get_body_pos_w(body_ids) + summary = { + "cls": type(backend).__name__, + "state_shape": list(backend.get_physics_state().shape), + "base_shape": list(backend.get_base_pos().shape), + "body_shape": list(body_pos.shape), + "body_finite": bool(np.all(np.isfinite(body_pos))), + "foot_shape": list(backend.get_sensor_data("FL_pos").shape), + "contact_shape": list(foot_contact.shape), + "contact_nonzero": bool(np.max(np.abs(foot_contact)) > 0.0), + "diagnostic_mode": diagnostics.mode, + "nthread": backend.nthread, + "workspace_count": diagnostics.workspace_count, + "time": np.round(backend.get_physics_state()[:, 0], decimals=6).tolist(), + "pydrake_loaded": "pydrake" in sys.modules, + } + print(json.dumps(summary, sort_keys=True)) + """ + ) + summary = json.loads(output.strip().splitlines()[-1]) + assert summary == { + "base_shape": [2, 3], + "body_finite": True, + "body_shape": [2, 2, 3], + "cls": "DrakeBackend", + "contact_shape": [2, 3], + "contact_nonzero": True, + "diagnostic_mode": "batch", + "foot_shape": [2, 3], + "nthread": 2, + "pydrake_loaded": False, + "state_shape": [2, 38], + "time": [1.02, 1.02], + "workspace_count": 2, + } + + +@pytest.mark.skipif( + not _batch_extension_built(), + reason="optional Drake batch extension has not been built", +) +def test_drake_backend_pre_step_hook_refreshes_between_substeps() -> None: + output = _run_clean_python( + """ + import json + + import numpy as np + + from unilab.assets import ASSETS_ROOT_PATH + from unilab.base.backend import create_backend + from unilab.base.scene import SceneCfg + + backend = create_backend( + "drake", + SceneCfg(model_file=str(ASSETS_ROOT_PATH / "robots/go1/scene_flat.xml")), + 1, + 0.01, + drake_backend_mode="batch", + drake_nthread=1, + ) + qpos = np.stack([backend.get_keyframe_qpos("home")]) + qvel = np.stack([backend.get_init_qvel()]) + backend.set_state(np.array([0], dtype=np.int32), qpos, qvel) + seen_times = [] + + def hook(backend_obj, policy_ctrl): + seen_times.append(round(float(backend_obj.get_physics_state()[0, 0]), 6)) + return policy_ctrl + + backend.set_pre_step_control(hook) + backend.step(backend.get_dof_pos(), nsteps=3) + summary = { + "seen_times": seen_times, + "final_time": round(float(backend.get_physics_state()[0, 0]), 6), + } + print(json.dumps(summary, sort_keys=True)) + """ + ) + assert json.loads(output.strip().splitlines()[-1]) == { + "final_time": 0.03, + "seen_times": [0.0, 0.01, 0.02], + } + + +@pytest.mark.skipif( + not _batch_extension_built(), + reason="optional Drake batch extension has not been built", +) +def test_drake_backend_body_frame_getters_use_compact_root_frame() -> None: + output = _run_clean_python( + """ + import json + + import numpy as np + + from unilab.assets import ASSETS_ROOT_PATH + from unilab.base.backend import create_backend + from unilab.base.scene import SceneCfg + + backend = create_backend( + "drake", + SceneCfg(model_file=str(ASSETS_ROOT_PATH / "robots/go1/scene_flat.xml")), + 1, + 0.01, + drake_backend_mode="batch", + drake_nthread=1, + ) + base_id = backend.get_body_ids(["trunk"]) + other_id = backend.get_body_ids(["FR_calf"]) + base_pos = backend.get_body_pos_b(base_id) + base_quat = backend.get_body_quat_b(base_id) + other_pos = backend.get_body_pos_b(other_id) + other_quat = backend.get_body_quat_b(other_id) + other_linvel = backend.get_body_lin_vel_b(other_id) + other_angvel = backend.get_body_ang_vel_b(other_id) + summary = { + "base_pos_shape": list(base_pos.shape), + "base_pos_zero": bool(np.allclose(base_pos, 0.0)), + "base_quat": base_quat.round(6).tolist(), + "other_pos_shape": list(other_pos.shape), + "other_quat_shape": list(other_quat.shape), + "other_finite": bool( + np.all(np.isfinite(other_pos)) + and np.all(np.isfinite(other_quat)) + and np.all(np.isfinite(other_linvel)) + and np.all(np.isfinite(other_angvel)) + ), + } + print(json.dumps(summary, sort_keys=True)) + """ + ) + summary = json.loads(output.strip().splitlines()[-1]) + assert summary == { + "base_pos_shape": [1, 1, 3], + "base_pos_zero": True, + "base_quat": [[[1.0, 0.0, 0.0, 0.0]]], + "other_finite": True, + "other_pos_shape": [1, 1, 3], + "other_quat_shape": [1, 1, 4], + } + + +@pytest.mark.skipif( + not _batch_extension_built(), + reason="optional Drake batch extension has not been built", +) +def test_create_go2_backend_batch_mode_avoids_pydrake_and_steps() -> None: + output = _run_clean_python( + """ + import json + import sys + + import numpy as np + + from unilab.assets import ASSETS_ROOT_PATH + from unilab.base.backend import create_backend + from unilab.base.scene import SceneCfg + + assert "pydrake" not in sys.modules + backend = create_backend( + "drake", + SceneCfg(model_file=str(ASSETS_ROOT_PATH / "robots/go2/scene_flat.xml")), + 2, + 0.01, + drake_backend_mode="batch", + drake_nthread=2, + ) + assert "pydrake" not in sys.modules + qpos = np.stack([backend.get_keyframe_qpos("home") for _ in range(2)]) + qvel = np.stack([backend.get_init_qvel() for _ in range(2)]) + backend.set_state(np.arange(2, dtype=np.int32), qpos, qvel) + backend.step(backend.get_dof_pos(), nsteps=2) + diagnostics = backend.diagnostics() + summary = { + "cls": type(backend).__name__, + "state_shape": list(backend.get_physics_state().shape), + "base_shape": list(backend.get_base_pos().shape), + "foot_shape": list(backend.get_sensor_data("FL_pos").shape), + "contact_shape": list(backend.get_sensor_data("FL_foot_contact").shape), + "diagnostic_mode": diagnostics.mode, + "nthread": backend.nthread, + "workspace_count": diagnostics.workspace_count, + "time": backend.get_physics_state()[:, 0].tolist(), + "pydrake_loaded": "pydrake" in sys.modules, + } + print(json.dumps(summary, sort_keys=True)) + """ + ) + summary = json.loads(output.strip().splitlines()[-1]) + assert summary == { + "base_shape": [2, 3], + "cls": "DrakeBackend", + "contact_shape": [2, 1], + "diagnostic_mode": "batch", + "foot_shape": [2, 3], + "nthread": 2, + "pydrake_loaded": False, + "state_shape": [2, 38], + "time": [0.02, 0.02], + "workspace_count": 2, + } diff --git a/tests/envs/locomotion/test_go2_footstand.py b/tests/envs/locomotion/test_go2_footstand.py index 1e3f4890d..ca25ade9c 100644 --- a/tests/envs/locomotion/test_go2_footstand.py +++ b/tests/envs/locomotion/test_go2_footstand.py @@ -19,12 +19,12 @@ ) -def test_go2_footstand_registers_mujoco_only() -> None: +def test_go2_footstand_registers_supported_backends() -> None: ensure_registries() meta = registry.list_registered_envs()["Go2FootStand"] - assert meta["available_backends"] == ["mujoco"] + assert meta["available_backends"] == ["drake", "mujoco"] class _OrientationBackend: diff --git a/tests/envs/test_sharpa.py b/tests/envs/test_sharpa.py index 9820b1d6c..3726c2d4c 100644 --- a/tests/envs/test_sharpa.py +++ b/tests/envs/test_sharpa.py @@ -68,7 +68,11 @@ def test_sharpa_registry_exposes_rotation_and_grasp_motrix() -> None: registered = registry.list_registered_envs() - assert set(registered["SharpaInhandRotation"]["available_backends"]) == {"mujoco", "motrix"} + assert set(registered["SharpaInhandRotation"]["available_backends"]) == { + "drake", + "mujoco", + "motrix", + } assert set(registered["SharpaInhandRotationGrasp"]["available_backends"]) == { "mujoco", "motrix", diff --git a/tests/envs/test_stewart.py b/tests/envs/test_stewart.py index 58b4a2d36..f59d967c0 100644 --- a/tests/envs/test_stewart.py +++ b/tests/envs/test_stewart.py @@ -37,9 +37,13 @@ def test_stewart_registered_backends() -> None: ensure_registries() registered = registry.list_registered_envs() assert "StewartBalance" in registered - # motrix is the validated training backend; mujoco is construct/step-capable - # (closed-loop stability tuning for mujoco is a follow-up). - assert set(registered["StewartBalance"]["available_backends"]) == {"motrix", "mujoco"} + # motrix is the original validated backend; mujoco and drake are + # construct/step-capable comparison backends. + assert set(registered["StewartBalance"]["available_backends"]) == { + "drake", + "motrix", + "mujoco", + } def test_stewart_motrix_owner_cfg_composes() -> None: diff --git a/tests/ipc/test_async_runner.py b/tests/ipc/test_async_runner.py index 9c4b4c11a..c772ca4eb 100644 --- a/tests/ipc/test_async_runner.py +++ b/tests/ipc/test_async_runner.py @@ -223,9 +223,11 @@ def test_start_collector_does_not_merge_runner_runtime_fields(): "token": "ok", }, ) - payload = report_queue.get(timeout=5) - assert payload == {"sim_backend": "missing", "token": "ok"} - r.close() + try: + payload = report_queue.get(timeout=30) + assert payload == {"sim_backend": "missing", "token": "ok"} + finally: + r.close() def test_format_collector_death_reports_shell_style_sigbus(): diff --git a/tests/scripts/test_drake_training_smoke.py b/tests/scripts/test_drake_training_smoke.py new file mode 100644 index 000000000..1954e1945 --- /dev/null +++ b/tests/scripts/test_drake_training_smoke.py @@ -0,0 +1,54 @@ +from __future__ import annotations + +import importlib.util +import subprocess +import sys +from pathlib import Path + +import pytest + +ROOT_DIR = Path(__file__).resolve().parents[2] + + +def _module_available(name: str) -> bool: + try: + return importlib.util.find_spec(name) is not None + except ModuleNotFoundError: + return False + + +def _drake_batch_available() -> bool: + return _module_available("drakeuni.compiled._drake_env_pool") + + +@pytest.mark.slow +@pytest.mark.skipif( + not _drake_batch_available(), + reason="optional DrakeUni batch extension has not been built", +) +@pytest.mark.parametrize("task", ["go1_joystick_flat/drake", "go2_joystick_flat/drake"]) +def test_drake_ppo_one_iteration_training_smoke(task: str, tmp_path: Path) -> None: + """Drake task configs can run the real RSL-RL training entry point.""" + result = subprocess.run( + [ + sys.executable, + "scripts/train_rsl_rl.py", + f"task={task}", + "training.no_play=true", + f"training.log_root={tmp_path / 'logs'}", + "algo.num_envs=4", + "algo.num_steps_per_env=4", + "algo.max_iterations=1", + "algo.save_interval=100", + "env.drake_nthread=1", + ], + cwd=ROOT_DIR, + capture_output=True, + text=True, + timeout=180, + ) + + assert result.returncode == 0, ( + f"Drake PPO smoke failed for {task}:\nstdout:\n{result.stdout}\nstderr:\n{result.stderr}" + ) + assert "Learning iteration 0/1" in result.stdout diff --git a/tests/scripts/test_train_scripts.py b/tests/scripts/test_train_scripts.py index 1c787e42e..f9abf228e 100644 --- a/tests/scripts/test_train_scripts.py +++ b/tests/scripts/test_train_scripts.py @@ -695,6 +695,33 @@ def test_ppo_task_go2_aligns_mujoco_with_motrix_defaults(): assert cfg.algo.algorithm.entropy_coef == pytest.approx(1.0e-3) +def test_ppo_go1_drake_batch_config_matches_current_contact_support(): + cfg = _ppo_cfg(["task=go1_joystick_flat/drake"]) + + assert cfg.env.drake_backend_mode == "batch" + assert cfg.env.drake_nthread == 0 + assert "contact" not in cfg.reward.scales + + +def test_ppo_go2_drake_batch_config_matches_go2_training_defaults(): + cfg = _ppo_cfg(["task=go2_joystick_flat/drake"]) + + assert cfg.training.task_name == "Go2JoystickFlat" + assert cfg.training.sim_backend == "drake" + assert cfg.algo.num_envs == 1024 + assert cfg.algo.max_iterations == 151 + assert cfg.algo.empirical_normalization is True + assert cfg.algo.policy.init_noise_std == pytest.approx(0.5) + assert cfg.algo.algorithm.learning_rate == pytest.approx(3.0e-4) + assert cfg.algo.algorithm.entropy_coef == pytest.approx(1.0e-3) + assert cfg.env.drake_backend_mode == "batch" + assert cfg.env.drake_nthread == 0 + assert cfg.env.scene.model_file == "src/unilab/assets/robots/go2/scene_flat.xml" + assert cfg.env.domain_rand.randomize_kp is False + assert cfg.env.domain_rand.randomize_kd is False + assert cfg.reward.scales.contact == pytest.approx(0.24) + + def test_build_ppo_env_cfg_override_go1_motrix( monkeypatch: pytest.MonkeyPatch, ):