diff --git a/docs/source/features/multi_gpu.rst b/docs/source/features/multi_gpu.rst index a76db175e85..2537e5eff25 100644 --- a/docs/source/features/multi_gpu.rst +++ b/docs/source/features/multi_gpu.rst @@ -141,14 +141,14 @@ For the master node, use the following command, where ``--nproc_per_node`` repre .. code-block:: shell - python -m torch.distributed.run --nproc_per_node=2 --nnodes=2 --node_rank=0 --rdzv_id=123 --rdzv_backend=c10d --rdzv_endpoint=localhost:5555 scripts/reinforcement_learning/rl_games/train.py --task=Isaac-Cartpole-v0 --headless --distributed + python -m torch.distributed.run --nproc_per_node=2 --nnodes=2 --node_rank=0 --master_addr= --master_port=5555 scripts/reinforcement_learning/rl_games/train.py --task=Isaac-Cartpole-v0 --headless --distributed .. tab-item:: rsl_rl :sync: rsl_rl .. code-block:: shell - python -m torch.distributed.run --nproc_per_node=2 --nnodes=2 --node_rank=0 --rdzv_id=123 --rdzv_backend=c10d --rdzv_endpoint=localhost:5555 scripts/reinforcement_learning/rsl_rl/train.py --task=Isaac-Cartpole-v0 --headless --distributed + python -m torch.distributed.run --nproc_per_node=2 --nnodes=2 --node_rank=0 --master_addr= --master_port=5555 scripts/reinforcement_learning/rsl_rl/train.py --task=Isaac-Cartpole-v0 --headless --distributed .. tab-item:: skrl :sync: skrl @@ -160,7 +160,7 @@ For the master node, use the following command, where ``--nproc_per_node`` repre .. code-block:: shell - python -m torch.distributed.run --nproc_per_node=2 --nnodes=2 --node_rank=0 --rdzv_id=123 --rdzv_backend=c10d --rdzv_endpoint=localhost:5555 scripts/reinforcement_learning/skrl/train.py --task=Isaac-Cartpole-v0 --headless --distributed + python -m torch.distributed.run --nproc_per_node=2 --nnodes=2 --node_rank=0 --master_addr= --master_port=5555 scripts/reinforcement_learning/skrl/train.py --task=Isaac-Cartpole-v0 --headless --distributed .. tab-item:: JAX :sync: jax @@ -181,14 +181,14 @@ For non-master nodes, use the following command, replacing ``--node_rank`` with .. code-block:: shell - python -m torch.distributed.run --nproc_per_node=2 --nnodes=2 --node_rank=1 --rdzv_id=123 --rdzv_backend=c10d --rdzv_endpoint=ip_of_master_machine:5555 scripts/reinforcement_learning/rl_games/train.py --task=Isaac-Cartpole-v0 --headless --distributed + python -m torch.distributed.run --nproc_per_node=2 --nnodes=2 --node_rank=1 --master_addr= --master_port=5555 scripts/reinforcement_learning/rl_games/train.py --task=Isaac-Cartpole-v0 --headless --distributed .. tab-item:: rsl_rl :sync: rsl_rl .. code-block:: shell - python -m torch.distributed.run --nproc_per_node=2 --nnodes=2 --node_rank=1 --rdzv_id=123 --rdzv_backend=c10d --rdzv_endpoint=ip_of_master_machine:5555 scripts/reinforcement_learning/rsl_rl/train.py --task=Isaac-Cartpole-v0 --headless --distributed + python -m torch.distributed.run --nproc_per_node=2 --nnodes=2 --node_rank=1 --master_addr= --master_port=5555 scripts/reinforcement_learning/rsl_rl/train.py --task=Isaac-Cartpole-v0 --headless --distributed .. tab-item:: skrl :sync: skrl @@ -200,7 +200,7 @@ For non-master nodes, use the following command, replacing ``--node_rank`` with .. code-block:: shell - python -m torch.distributed.run --nproc_per_node=2 --nnodes=2 --node_rank=1 --rdzv_id=123 --rdzv_backend=c10d --rdzv_endpoint=ip_of_master_machine:5555 scripts/reinforcement_learning/skrl/train.py --task=Isaac-Cartpole-v0 --headless --distributed + python -m torch.distributed.run --nproc_per_node=2 --nnodes=2 --node_rank=1 --master_addr= --master_port=5555 scripts/reinforcement_learning/skrl/train.py --task=Isaac-Cartpole-v0 --headless --distributed .. tab-item:: JAX :sync: jax diff --git a/docs/source/setup/installation/index.rst b/docs/source/setup/installation/index.rst index 3c9971cab73..b8b794ec237 100644 --- a/docs/source/setup/installation/index.rst +++ b/docs/source/setup/installation/index.rst @@ -89,8 +89,6 @@ Other notable limitations with respect to Isaac Lab include... #. Livestream and Hub Workstation Cache are not supported on the DGX spark. -#. Multi-node training may require direct connections between Spark machines or additional network configurations. - #. :ref:`Running Cosmos Transfer1 ` is not currently supported on the DGX Spark. Troubleshooting