Skip to content

Commit db9d457

Browse files
helloguopytorchmergebot
authored andcommitted
Use sleef on macOS Apple silicon by default (pytorch#126509)
Use sleef ~~for aarch64~~ on macOS Apple silicon by default. Pull Request resolved: pytorch#126509 Approved by: https://github.com/digantdesai, https://github.com/malfet
1 parent 2fc9079 commit db9d457

File tree

2 files changed

+10
-0
lines changed

2 files changed

+10
-0
lines changed

CMakeLists.txt

+8
Original file line numberDiff line numberDiff line change
@@ -892,6 +892,14 @@ endif()
892892

893893
if(USE_SLEEF_FOR_ARM_VEC256)
894894
string(APPEND CMAKE_CXX_FLAGS " -DAT_BUILD_ARM_VEC256_WITH_SLEEF")
895+
add_definitions(-DAT_BUILD_ARM_VEC256_WITH_SLEEF)
896+
endif()
897+
898+
# Enable sleef on macOS with Apple silicon by default
899+
if((${CMAKE_SYSTEM_NAME} STREQUAL "Darwin") AND (${CMAKE_SYSTEM_PROCESSOR} STREQUAL "arm64"))
900+
message(STATUS "Running on macOS with Apple silicon")
901+
string(APPEND CMAKE_CXX_FLAGS " -DAT_BUILD_ARM_VEC256_WITH_SLEEF")
902+
add_definitions(-DAT_BUILD_ARM_VEC256_WITH_SLEEF)
895903
endif()
896904

897905
if(USE_XNNPACK)

torch/_inductor/codecache.py

+2
Original file line numberDiff line numberDiff line change
@@ -1327,6 +1327,8 @@ def __bool__(self) -> bool:
13271327
class VecNEON(VecISA):
13281328
_bit_width = 256 # This is required to leverage the compute implemented in aten/src/ATen/cpu/vec/vec256/vec256_float_neon.h
13291329
_macro = "-DCPU_CAPABILITY_NEON"
1330+
if sys.platform == "darwin" and platform.processor() == "arm":
1331+
_macro += " -DAT_BUILD_ARM_VEC256_WITH_SLEEF"
13301332
_arch_flags = "" # Unused
13311333
_dtype_nelements = {torch.float: 8, torch.bfloat16: 16, torch.float16: 16}
13321334

0 commit comments

Comments
 (0)