Skip to content

Commit 35615a5

Browse files
Merge remote-tracking branch 'upstream/main' into rocm7.1_internal_testing_IFU_2025-10-27
# Conflicts: # .ci/docker/build.sh # .ci/docker/ci_commit_pins/triton.txt # .ci/docker/libtorch/build.sh # CMakeLists.txt # aten/src/ATen/native/sparse/cuda/SparseMatMul.cu # requirements-build.txt # test/dynamo/test_structured_trace.py # test/inductor/test_cuda_repro.py # test/inductor/test_decompose_mem_bound_mm.py # test/inductor/test_max_autotune.py # test/test_linalg.py # test/test_matmul_cuda.py # torch/_inductor/runtime/coordinate_descent_tuner.py # torch/_inductor/runtime/triton_heuristics.py # torch/testing/_internal/common_utils.py
2 parents dd94311 + ee7434b commit 35615a5

File tree

2,947 files changed

+95199
-40312
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

2,947 files changed

+95199
-40312
lines changed

.ci/aarch64_linux/aarch64_ci_build.sh

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,17 @@ if [[ "$GPU_ARCH_VERSION" == *"12.6"* ]]; then
88
export TORCH_CUDA_ARCH_LIST="8.0;9.0"
99
elif [[ "$GPU_ARCH_VERSION" == *"12.8"* ]]; then
1010
export TORCH_CUDA_ARCH_LIST="8.0;9.0;10.0;12.0"
11+
elif [[ "$GPU_ARCH_VERSION" == *"12.9"* ]]; then
12+
export TORCH_CUDA_ARCH_LIST="8.0;9.0;10.0;12.0"
1113
elif [[ "$GPU_ARCH_VERSION" == *"13.0"* ]]; then
1214
export TORCH_CUDA_ARCH_LIST="8.0;9.0;10.0;11.0;12.0+PTX"
1315
fi
1416

1517
# Compress the fatbin with -compress-mode=size for CUDA 13
1618
if [[ "$DESIRED_CUDA" == *"13"* ]]; then
1719
export TORCH_NVCC_FLAGS="-compress-mode=size"
20+
# Bundle ptxas into the cu13 wheel, see https://github.com/pytorch/pytorch/issues/163801
21+
export BUILD_BUNDLE_PTXAS=1
1822
fi
1923

2024
SCRIPTPATH="$( cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"

.ci/aarch64_linux/aarch64_wheel_ci_build.py

Lines changed: 4 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -13,49 +13,6 @@ def list_dir(path: str) -> list[str]:
1313
return check_output(["ls", "-1", path]).decode().split("\n")
1414

1515

16-
def build_ArmComputeLibrary() -> None:
17-
"""
18-
Using ArmComputeLibrary for aarch64 PyTorch
19-
"""
20-
print("Building Arm Compute Library")
21-
acl_build_flags = [
22-
"debug=0",
23-
"neon=1",
24-
"opencl=0",
25-
"os=linux",
26-
"openmp=1",
27-
"cppthreads=0",
28-
"arch=armv8a",
29-
"multi_isa=1",
30-
"fixed_format_kernels=1",
31-
"build=native",
32-
]
33-
acl_install_dir = "/acl"
34-
acl_checkout_dir = os.getenv("ACL_SOURCE_DIR", "ComputeLibrary")
35-
if os.path.isdir(acl_install_dir):
36-
shutil.rmtree(acl_install_dir)
37-
if not os.path.isdir(acl_checkout_dir) or not len(os.listdir(acl_checkout_dir)):
38-
check_call(
39-
[
40-
"git",
41-
"clone",
42-
"https://github.com/ARM-software/ComputeLibrary.git",
43-
"-b",
44-
"v25.02",
45-
"--depth",
46-
"1",
47-
"--shallow-submodules",
48-
]
49-
)
50-
51-
check_call(
52-
["scons", "Werror=1", f"-j{os.cpu_count()}"] + acl_build_flags,
53-
cwd=acl_checkout_dir,
54-
)
55-
for d in ["arm_compute", "include", "utils", "support", "src", "build"]:
56-
shutil.copytree(f"{acl_checkout_dir}/{d}", f"{acl_install_dir}/{d}")
57-
58-
5916
def replace_tag(filename) -> None:
6017
with open(filename) as f:
6118
lines = f.readlines()
@@ -356,23 +313,17 @@ def parse_arguments():
356313
build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={branch[1 : branch.find('-')]} PYTORCH_BUILD_NUMBER=1 "
357314

358315
if enable_mkldnn:
359-
build_ArmComputeLibrary()
360316
print("build pytorch with mkldnn+acl backend")
361-
build_vars += (
362-
"USE_MKLDNN=ON USE_MKLDNN_ACL=ON "
363-
"ACL_ROOT_DIR=/acl "
364-
"LD_LIBRARY_PATH=/pytorch/build/lib:/acl/build:$LD_LIBRARY_PATH "
365-
"ACL_INCLUDE_DIR=/acl/build "
366-
"ACL_LIBRARY=/acl/build "
367-
)
317+
build_vars += "USE_MKLDNN=ON USE_MKLDNN_ACL=ON "
318+
build_vars += "ACL_ROOT_DIR=/acl "
368319
if enable_cuda:
369320
build_vars += "BLAS=NVPL "
370321
else:
371-
build_vars += "BLAS=OpenBLAS OpenBLAS_HOME=/OpenBLAS "
322+
build_vars += "BLAS=OpenBLAS OpenBLAS_HOME=/opt/OpenBLAS "
372323
else:
373324
print("build pytorch without mkldnn backend")
374325

375-
os.system(f"cd /pytorch; {build_vars} python3 setup.py bdist_wheel")
326+
os.system(f"cd /pytorch; {build_vars} python3 -m build --wheel --no-isolation")
376327
if enable_cuda:
377328
print("Updating Cuda Dependency")
378329
filename = os.listdir("/pytorch/dist/")

.ci/aarch64_linux/build_aarch64_wheel.py

Lines changed: 15 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -299,40 +299,6 @@ def install_condaforge_python(host: RemoteHost, python_version="3.8") -> None:
299299
)
300300

301301

302-
def build_OpenBLAS(host: RemoteHost, git_clone_flags: str = "") -> None:
303-
print("Building OpenBLAS")
304-
host.run_cmd(
305-
f"git clone https://github.com/xianyi/OpenBLAS -b v0.3.28 {git_clone_flags}"
306-
)
307-
make_flags = "NUM_THREADS=64 USE_OPENMP=1 NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=ARMV8"
308-
host.run_cmd(
309-
f"pushd OpenBLAS && make {make_flags} -j8 && sudo make {make_flags} install && popd && rm -rf OpenBLAS"
310-
)
311-
312-
313-
def build_ArmComputeLibrary(host: RemoteHost, git_clone_flags: str = "") -> None:
314-
print("Building Arm Compute Library")
315-
acl_build_flags = " ".join(
316-
[
317-
"debug=0",
318-
"neon=1",
319-
"opencl=0",
320-
"os=linux",
321-
"openmp=1",
322-
"cppthreads=0",
323-
"arch=armv8a",
324-
"multi_isa=1",
325-
"fixed_format_kernels=1",
326-
"build=native",
327-
]
328-
)
329-
host.run_cmd(
330-
f"git clone https://github.com/ARM-software/ComputeLibrary.git -b v25.02 {git_clone_flags}"
331-
)
332-
333-
host.run_cmd(f"cd ComputeLibrary && scons Werror=1 -j8 {acl_build_flags}")
334-
335-
336302
def embed_libgomp(host: RemoteHost, use_conda, wheel_name) -> None:
337303
host.run_cmd("pip3 install auditwheel")
338304
host.run_cmd(
@@ -442,7 +408,7 @@ def build_torchvision(
442408
if host.using_docker():
443409
build_vars += " CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000"
444410

445-
host.run_cmd(f"cd vision && {build_vars} python3 setup.py bdist_wheel")
411+
host.run_cmd(f"cd vision && {build_vars} python3 -m build --wheel --no-isolation")
446412
vision_wheel_name = host.list_dir("vision/dist")[0]
447413
embed_libgomp(host, use_conda, os.path.join("vision", "dist", vision_wheel_name))
448414

@@ -497,7 +463,7 @@ def build_torchdata(
497463
if host.using_docker():
498464
build_vars += " CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000"
499465

500-
host.run_cmd(f"cd data && {build_vars} python3 setup.py bdist_wheel")
466+
host.run_cmd(f"cd data && {build_vars} python3 -m build --wheel --no-isolation")
501467
wheel_name = host.list_dir("data/dist")[0]
502468
embed_libgomp(host, use_conda, os.path.join("data", "dist", wheel_name))
503469

@@ -553,7 +519,7 @@ def build_torchtext(
553519
if host.using_docker():
554520
build_vars += " CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000"
555521

556-
host.run_cmd(f"cd text && {build_vars} python3 setup.py bdist_wheel")
522+
host.run_cmd(f"cd text && {build_vars} python3 -m build --wheel --no-isolation")
557523
wheel_name = host.list_dir("text/dist")[0]
558524
embed_libgomp(host, use_conda, os.path.join("text", "dist", wheel_name))
559525

@@ -614,7 +580,7 @@ def build_torchaudio(
614580
host.run_cmd(
615581
f"cd audio && export FFMPEG_ROOT=$(pwd)/third_party/ffmpeg && export USE_FFMPEG=1 \
616582
&& ./packaging/ffmpeg/build.sh \
617-
&& {build_vars} python3 setup.py bdist_wheel"
583+
&& {build_vars} python3 -m build --wheel --no-isolation"
618584
)
619585

620586
wheel_name = host.list_dir("audio/dist")[0]
@@ -700,7 +666,6 @@ def start_build(
700666
configure_system(
701667
host, compiler=compiler, use_conda=use_conda, python_version=python_version
702668
)
703-
build_OpenBLAS(host, git_clone_flags)
704669

705670
if host.using_docker():
706671
print("Move libgfortant.a into a standard location")
@@ -723,10 +688,12 @@ def start_build(
723688
f"git clone --recurse-submodules -b {branch} https://github.com/pytorch/pytorch {git_clone_flags}"
724689
)
725690

691+
host.run_cmd("pytorch/.ci/docker/common/install_openblas.sh")
692+
726693
print("Building PyTorch wheel")
727694
build_opts = ""
728695
if pytorch_build_number is not None:
729-
build_opts += f" --build-number {pytorch_build_number}"
696+
build_opts += f" -C--build-option=--build-number={pytorch_build_number}"
730697
# Breakpad build fails on aarch64
731698
build_vars = "USE_BREAKPAD=0 "
732699
if branch == "nightly":
@@ -743,15 +710,18 @@ def start_build(
743710
if host.using_docker():
744711
build_vars += " CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000"
745712
if enable_mkldnn:
746-
build_ArmComputeLibrary(host, git_clone_flags)
713+
host.run_cmd("pytorch/.ci/docker/common/install_acl.sh")
747714
print("build pytorch with mkldnn+acl backend")
748715
build_vars += " USE_MKLDNN=ON USE_MKLDNN_ACL=ON"
716+
build_vars += " BLAS=OpenBLAS"
717+
build_vars += " OpenBLAS_HOME=/opt/OpenBLAS"
718+
build_vars += " ACL_ROOT_DIR=/acl"
749719
host.run_cmd(
750-
f"cd $HOME/pytorch && export ACL_ROOT_DIR=$HOME/ComputeLibrary && {build_vars} python3 setup.py bdist_wheel{build_opts}"
720+
f"cd $HOME/pytorch && {build_vars} python3 -m build --wheel --no-isolation{build_opts}"
751721
)
752722
print("Repair the wheel")
753723
pytorch_wheel_name = host.list_dir("pytorch/dist")[0]
754-
ld_library_path = "$HOME/acl/build:$HOME/pytorch/build/lib"
724+
ld_library_path = "/acl/build:$HOME/pytorch/build/lib"
755725
host.run_cmd(
756726
f"export LD_LIBRARY_PATH={ld_library_path} && auditwheel repair $HOME/pytorch/dist/{pytorch_wheel_name}"
757727
)
@@ -763,7 +733,7 @@ def start_build(
763733
else:
764734
print("build pytorch without mkldnn backend")
765735
host.run_cmd(
766-
f"cd pytorch && {build_vars} python3 setup.py bdist_wheel{build_opts}"
736+
f"cd pytorch && {build_vars} python3 -m build --wheel --no-isolation{build_opts}"
767737
)
768738

769739
print("Deleting build folder")
@@ -907,7 +877,7 @@ def terminate_instances(instance_type: str) -> None:
907877
def parse_arguments():
908878
from argparse import ArgumentParser
909879

910-
parser = ArgumentParser("Builid and test AARCH64 wheels using EC2")
880+
parser = ArgumentParser("Build and test AARCH64 wheels using EC2")
911881
parser.add_argument("--key-name", type=str)
912882
parser.add_argument("--debug", action="store_true")
913883
parser.add_argument("--build-only", action="store_true")

.ci/docker/almalinux/Dockerfile

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,8 @@ RUN bash ./install_cuda.sh 13.0
6969
ENV DESIRED_CUDA=13.0
7070

7171
FROM ${ROCM_IMAGE} as rocm
72-
ENV PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201"
72+
ARG PYTORCH_ROCM_ARCH
73+
ENV PYTORCH_ROCM_ARCH ${PYTORCH_ROCM_ARCH}
7374
ADD ./common/install_mkl.sh install_mkl.sh
7475
RUN bash ./install_mkl.sh && rm install_mkl.sh
7576
ENV MKLROOT /opt/intel

.ci/docker/almalinux/build.sh

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,12 @@ case ${DOCKER_TAG_PREFIX} in
3636
;;
3737
rocm*)
3838
BASE_TARGET=rocm
39+
PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201"
40+
# add gfx950, gfx115x conditionally starting in ROCm 7.0
41+
if [[ "$ROCM_VERSION" == *"7.0"* ]]; then
42+
PYTORCH_ROCM_ARCH="${PYTORCH_ROCM_ARCH};gfx950;gfx1150;gfx1151"
43+
fi
44+
EXTRA_BUILD_ARGS="${EXTRA_BUILD_ARGS} --build-arg PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH}"
3945
;;
4046
*)
4147
echo "ERROR: Unknown docker tag ${DOCKER_TAG_PREFIX}"

.ci/docker/build.sh

Lines changed: 10 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -88,8 +88,8 @@ fi
8888
_UCX_COMMIT=7836b165abdbe468a2f607e7254011c07d788152
8989
_UCC_COMMIT=430e241bf5d38cbc73fc7a6b89155397232e3f96
9090
if [[ "$image" == *rocm* ]]; then
91-
_UCX_COMMIT=cc312eaa4655c0cc5c2bcd796db938f90563bcf6
92-
_UCC_COMMIT=0c0fc21559835044ab107199e334f7157d6a0d3d
91+
_UCX_COMMIT=29831d319e6be55cb8c768ca61de335c934ca39e
92+
_UCC_COMMIT=9f4b242cbbd8b1462cbc732eb29316cdfa124b77
9393
fi
9494

9595
tag=$(echo $image | awk -F':' '{print $2}')
@@ -117,6 +117,7 @@ case "$tag" in
117117
UCX_COMMIT=${_UCX_COMMIT}
118118
UCC_COMMIT=${_UCC_COMMIT}
119119
TRITON=yes
120+
INSTALL_MINGW=yes
120121
;;
121122
pytorch-linux-jammy-cuda13.0-cudnn9-py3-gcc11)
122123
CUDA_VERSION=13.0.0
@@ -179,28 +180,17 @@ case "$tag" in
179180
fi
180181
GCC_VERSION=11
181182
VISION=yes
182-
ROCM_VERSION=6.4
183+
ROCM_VERSION=7.0
183184
NINJA_VERSION=1.9.0
184185
TRITON=yes
185186
KATEX=yes
186187
UCX_COMMIT=${_UCX_COMMIT}
187188
UCC_COMMIT=${_UCC_COMMIT}
189+
PYTORCH_ROCM_ARCH="gfx90a;gfx942;gfx950;gfx1100"
188190
if [[ $tag =~ "benchmarks" ]]; then
189191
INDUCTOR_BENCHMARKS=yes
190192
fi
191193
;;
192-
pytorch-linux-noble-rocm-alpha-py3)
193-
ANACONDA_PYTHON_VERSION=3.12
194-
GCC_VERSION=11
195-
VISION=yes
196-
ROCM_VERSION=7.0
197-
NINJA_VERSION=1.9.0
198-
TRITON=yes
199-
KATEX=yes
200-
UCX_COMMIT=${_UCX_COMMIT}
201-
UCC_COMMIT=${_UCC_COMMIT}
202-
PYTORCH_ROCM_ARCH="gfx90a;gfx942;gfx950"
203-
;;
204194
pytorch-linux-jammy-xpu-n-1-py3)
205195
ANACONDA_PYTHON_VERSION=3.10
206196
GCC_VERSION=11
@@ -371,7 +361,7 @@ docker build \
371361
--build-arg "NINJA_VERSION=${NINJA_VERSION:-}" \
372362
--build-arg "KATEX=${KATEX:-}" \
373363
--build-arg "ROCM_VERSION=${ROCM_VERSION:-}" \
374-
--build-arg "PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH:-gfx90a;gfx942}" \
364+
--build-arg "PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH}" \
375365
--build-arg "IMAGE_NAME=${IMAGE_NAME}" \
376366
--build-arg "UCX_COMMIT=${UCX_COMMIT}" \
377367
--build-arg "UCC_COMMIT=${UCC_COMMIT}" \
@@ -389,6 +379,7 @@ docker build \
389379
--build-arg "OPENBLAS=${OPENBLAS:-}" \
390380
--build-arg "SKIP_SCCACHE_INSTALL=${SKIP_SCCACHE_INSTALL:-}" \
391381
--build-arg "SKIP_LLVM_SRC_BUILD_INSTALL=${SKIP_LLVM_SRC_BUILD_INSTALL:-}" \
382+
--build-arg "INSTALL_MINGW=${INSTALL_MINGW:-}" \
392383
-f $(dirname ${DOCKERFILE})/Dockerfile \
393384
-t "$tmp_tag" \
394385
"$@" \
@@ -469,6 +460,7 @@ elif [ "$HAS_TRITON" = "yes" ]; then
469460
echo "expecting triton to not be installed, but it is"
470461
exit 0
471462
fi
463+
<<<<<<< HEAD
472464

473465
# Sanity check cmake version. Executorch reinstalls cmake and I'm not sure if
474466
# they support 4.0.0 yet, so exclude them from this check.
@@ -478,3 +470,5 @@ if [[ "$EXECUTORCH" != *yes* && "$CMAKE_VERSION" != *4.* ]]; then
478470
drun cmake --version
479471
exit 0
480472
fi
473+
=======
474+
>>>>>>> upstream/main
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
e0dda9059d082537cee36be6c5e4fe3b18c880c0
1+
deb42f2a8e48f5032b4a98ee781a15fa87a157cf
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
v2.27.5-1
1+
v2.27.5-1
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,5 @@
1+
<<<<<<< HEAD
12
d704bc6e69c1a588c8edd3cbb67505d554ed65f6
3+
=======
4+
7416ffcb92cdbe98d9f97e4e6f95247e46dfc9fd
5+
>>>>>>> upstream/main

.ci/docker/common/install_acl.sh

100644100755
Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,27 @@
1-
set -euo pipefail
1+
#!/bin/bash
2+
# Script used only in CD pipeline
23

3-
readonly version=v25.02
4-
readonly src_host=https://github.com/ARM-software
5-
readonly src_repo=ComputeLibrary
4+
set -eux
65

7-
# Clone ACL
8-
[[ ! -d ${src_repo} ]] && git clone ${src_host}/${src_repo}.git
9-
cd ${src_repo}
6+
ACL_VERSION=${ACL_VERSION:-"v25.02"}
7+
ACL_INSTALL_DIR="/acl"
108

11-
git checkout $version
9+
# Clone ACL
10+
git clone https://github.com/ARM-software/ComputeLibrary.git -b "${ACL_VERSION}" --depth 1 --shallow-submodules
1211

12+
ACL_CHECKOUT_DIR="ComputeLibrary"
1313
# Build with scons
14+
pushd $ACL_CHECKOUT_DIR
1415
scons -j8 Werror=0 debug=0 neon=1 opencl=0 embed_kernels=0 \
1516
os=linux arch=armv8a build=native multi_isa=1 \
1617
fixed_format_kernels=1 openmp=1 cppthreads=0
18+
popd
19+
20+
# Install ACL
21+
sudo mkdir -p ${ACL_INSTALL_DIR}
22+
for d in arm_compute include utils support src build
23+
do
24+
sudo cp -r ${ACL_CHECKOUT_DIR}/${d} ${ACL_INSTALL_DIR}/${d}
25+
done
26+
27+
rm -rf $ACL_CHECKOUT_DIR

0 commit comments

Comments
 (0)