From c4bff71854d1b8f5c319a01866e338ba38f02f4f Mon Sep 17 00:00:00 2001
From: Xuehai Pan <XuehaiPan@pku.edu.cn>
Date: Thu, 26 Dec 2024 16:48:42 +0800
Subject: [PATCH] [Easy] Add ROCm support to nightly pull tool (#141282)

Pull Request resolved: https://github.com/pytorch/pytorch/pull/141282
Approved by: https://github.com/malfet
ghstack dependencies: #143263
---
 .../scripts/generate_binary_build_matrix.py   |  1 +
 CONTRIBUTING.md                               | 11 ++-
 Makefile                                      |  4 ++
 tools/nightly.py                              | 68 +++++++++++++------
 4 files changed, 64 insertions(+), 20 deletions(-)
diff --git a/.github/scripts/generate_binary_build_matrix.py b/.github/scripts/generate_binary_build_matrix.py
index 2eb7b8a80b49b..eee9c6581de98 100644
--- a/.github/scripts/generate_binary_build_matrix.py
+++ b/.github/scripts/generate_binary_build_matrix.py
@@ -20,6 +20,7 @@
 CUDA_ARCHES_FULL_VERSION = {"11.8": "11.8.0", "12.4": "12.4.1", "12.6": "12.6.3"}
 CUDA_ARCHES_CUDNN_VERSION = {"11.8": "9", "12.4": "9", "12.6": "9"}
 
+# NOTE: Also update the ROCm sources in tools/nightly.py when changing this list
 ROCM_ARCHES = ["6.2.4", "6.3"]
 
 XPU_ARCHES = ["xpu"]
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index da8298ba80f11..9e5b64270bef7 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -78,7 +78,9 @@ git clone git@github.com:<USERNAME>/pytorch.git
 cd pytorch
 git remote add upstream git@github.com:pytorch/pytorch.git
 
-make setup-env  # or make setup-env-cuda for pre-built CUDA binaries
+make setup-env
+# Or run `make setup-env-cuda` for pre-built CUDA binaries
+# Or run `make setup-env-rocm` for pre-built ROCm binaries
 source venv/bin/activate  # or `& .\venv\Scripts\Activate.ps1` on Windows
 ```
 
@@ -193,6 +195,13 @@ To install the nightly binaries built with CUDA, you can pass in the flag `--cud
 source venv/bin/activate  # or `& .\venv\Scripts\Activate.ps1` on Windows
 ```
 
+To install the nightly binaries built with ROCm, you can pass in the flag `--rocm`:
+
+```bash
+./tools/nightly.py checkout -b my-nightly-branch --rocm
+source venv/bin/activate  # or `& .\venv\Scripts\Activate.ps1` on Windows
+```
+
 You can also use this tool to pull the nightly commits into the current branch:
 
 ```bash
diff --git a/Makefile b/Makefile
index 8331bb6f68a81..e5b4386b5dd22 100644
--- a/Makefile
+++ b/Makefile
@@ -35,8 +35,12 @@ setup-env: ensure-branch-clean
 setup-env-cuda:
 	$(MAKE) setup-env PYTHON="$(PYTHON)" NIGHTLY_TOOL_OPTS="$(NIGHTLY_TOOL_OPTS) --cuda"
 
+setup-env-rocm:
+	$(MAKE) setup-env PYTHON="$(PYTHON)" NIGHTLY_TOOL_OPTS="$(NIGHTLY_TOOL_OPTS) --rocm"
+
 setup_env: setup-env
 setup_env_cuda: setup-env-cuda
+setup_env_rocm: setup-env-rocm
 
 setup-lint:
 	$(PIP) install lintrunner
diff --git a/tools/nightly.py b/tools/nightly.py
index 80341008d6e33..3a169deed2d1b 100755
--- a/tools/nightly.py
+++ b/tools/nightly.py
@@ -20,6 +20,11 @@
     $ ./tools/nightly.py checkout -b my-nightly-branch --cuda
     $ source venv/bin/activate  # or `& .\venv\Scripts\Activate.ps1` on Windows
 
+To install the nightly binaries built with ROCm, you can pass in the flag --rocm::
+
+    $ ./tools/nightly.py checkout -b my-nightly-branch --rocm
+    $ source venv/bin/activate  # or `& .\venv\Scripts\Activate.ps1` on Windows
+
 You can also use this tool to pull the nightly commits into the current branch as
 well. This can be done with::
 
@@ -134,6 +139,12 @@ class PipSource(NamedTuple):
         supported_platforms={"Linux", "Windows"},
         accelerator="cuda",
     ),
+    "rocm-6.2.4": PipSource(
+        name="rocm-6.2.4",
+        index_url=f"{PYTORCH_NIGHTLY_PIP_INDEX_URL}/rocm6.2.4",
+        supported_platforms={"Linux"},
+        accelerator="rocm",
+    ),
 }
 
 
@@ -882,6 +893,17 @@ def find_executable(name: str) -> Path:
             default=argparse.SUPPRESS,
             metavar="VERSION",
         )
+        subparser.add_argument(
+            "--rocm",
+            help=(
+                "ROCm version to install "
+                "(defaults to the latest version available on the platform)"
+            ),
+            dest="rocm",
+            nargs="?",
+            default=argparse.SUPPRESS,
+            metavar="VERSION",
+        )
     return parser
 
 
@@ -889,6 +911,8 @@ def parse_arguments() -> argparse.Namespace:
     parser = make_parser()
     args = parser.parse_args()
     args.branch = getattr(args, "branch", None)
+    if hasattr(args, "cuda") and hasattr(args, "rocm"):
+        parser.error("Cannot specify both CUDA and ROCm versions.")
     return args
 
 
@@ -901,26 +925,32 @@ def main() -> None:
         sys.exit(status)
 
     pip_source = None
-    if hasattr(args, "cuda"):
-        available_sources = {
-            src.name[len("cuda-") :]: src
-            for src in PIP_SOURCES.values()
-            if src.name.startswith("cuda-") and PLATFORM in src.supported_platforms
-        }
-        if not available_sources:
-            print(f"No CUDA versions available on platform {PLATFORM}.")
-            sys.exit(1)
-        if args.cuda is not None:
-            pip_source = available_sources.get(args.cuda)
-            if pip_source is None:
-                print(
-                    f"CUDA {args.cuda} is not available on platform {PLATFORM}. "
-                    f"Available version(s): {', '.join(sorted(available_sources, key=Version))}"
-                )
+
+    for toolkit in ("CUDA", "ROCm"):
+        accel = toolkit.lower()
+        if hasattr(args, accel):
+            requested = getattr(args, accel)
+            available_sources = {
+                src.name[len(f"{accel}-") :]: src
+                for src in PIP_SOURCES.values()
+                if src.name.startswith(f"{accel}-")
+                and PLATFORM in src.supported_platforms
+            }
+            if not available_sources:
+                print(f"No {toolkit} versions available on platform {PLATFORM}.")
                 sys.exit(1)
-        else:
-            pip_source = available_sources[max(available_sources, key=Version)]
-    else:
+            if requested is not None:
+                pip_source = available_sources.get(requested)
+                if pip_source is None:
+                    print(
+                        f"{toolkit} {requested} is not available on platform {PLATFORM}. "
+                        f"Available version(s): {', '.join(sorted(available_sources, key=Version))}"
+                    )
+                    sys.exit(1)
+            else:
+                pip_source = available_sources[max(available_sources, key=Version)]
+
+    if pip_source is None:
         pip_source = PIP_SOURCES["cpu"]  # always available
 
     with logging_manager(debug=args.verbose) as logger: