diff --git a/build/make/Makefile b/build/make/Makefile index c2dc47ccff6..199ed78058a 100644 --- a/build/make/Makefile +++ b/build/make/Makefile @@ -148,6 +148,8 @@ $(BUILD_PFX)%_neon_dotprod.c.d: CFLAGS += -march=armv8.2-a+dotprod $(BUILD_PFX)%_neon_dotprod.c.o: CFLAGS += -march=armv8.2-a+dotprod $(BUILD_PFX)%_neon_i8mm.c.d: CFLAGS += -march=armv8.2-a+dotprod+i8mm $(BUILD_PFX)%_neon_i8mm.c.o: CFLAGS += -march=armv8.2-a+dotprod+i8mm +$(BUILD_PFX)%_sve.c.d: CFLAGS += -march=armv8.2-a+dotprod+i8mm+sve +$(BUILD_PFX)%_sve.c.o: CFLAGS += -march=armv8.2-a+dotprod+i8mm+sve # POWER $(BUILD_PFX)%_vsx.c.d: CFLAGS += -maltivec -mvsx diff --git a/build/make/rtcd.pl b/build/make/rtcd.pl index 1a6b93d5ae2..0b9e16738ec 100755 --- a/build/make/rtcd.pl +++ b/build/make/rtcd.pl @@ -487,7 +487,7 @@ () @ALL_ARCHS = filter(qw/neon_asm neon/); arm; } elsif ($opts{arch} eq 'armv8' || $opts{arch} eq 'arm64' ) { - @ALL_ARCHS = filter(qw/neon neon_dotprod neon_i8mm/); + @ALL_ARCHS = filter(qw/neon neon_dotprod neon_i8mm sve/); @REQUIRES = filter(qw/neon/); &require(@REQUIRES); arm; diff --git a/configure b/configure index 2c638e5e5a9..434c43792c4 100755 --- a/configure +++ b/configure @@ -257,6 +257,7 @@ ARCH_EXT_LIST_AARCH64=" neon neon_dotprod neon_i8mm + sve " ARCH_EXT_LIST_X86=" diff --git a/test/init_vpx_test.cc b/test/init_vpx_test.cc index e88c54f323b..f66f00b5c14 100644 --- a/test/init_vpx_test.cc +++ b/test/init_vpx_test.cc @@ -54,6 +54,9 @@ void init_vpx_test() { if (!(caps & HAS_NEON_I8MM)) { append_negative_gtest_filter(":NEON_I8MM.*:NEON_I8MM/*"); } + if (!(caps & HAS_SVE)) { + append_negative_gtest_filter(":SVE.*:SVE/*"); + } #elif VPX_ARCH_ARM const int caps = arm_cpu_caps(); if (!(caps & HAS_NEON)) append_negative_gtest_filter(":NEON.*:NEON/*"); diff --git a/vpx_ports/aarch64_cpudetect.c b/vpx_ports/aarch64_cpudetect.c index ac68f444526..f56d5888bac 100644 --- a/vpx_ports/aarch64_cpudetect.c +++ b/vpx_ports/aarch64_cpudetect.c @@ -77,7 +77,7 @@ static int arm_get_cpu_caps(void) { } #endif // defined(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE) #endif // HAVE_NEON_DOTPROD - // No I8MM feature detection available on Windows at time of writing. + // No I8MM or SVE feature detection available on Windows at time of writing. return flags; } @@ -98,6 +98,7 @@ static int arm_get_cpu_caps(void) { // Define hwcap values ourselves: building with an old auxv header where these // hwcap values are not defined should not prevent features from being enabled. #define VPX_AARCH64_HWCAP_ASIMDDP (1 << 20) +#define VPX_AARCH64_HWCAP_SVE (1 << 22) #define VPX_AARCH64_HWCAP2_I8MM (1 << 13) static int arm_get_cpu_caps(void) { @@ -117,6 +118,11 @@ static int arm_get_cpu_caps(void) { flags |= HAS_NEON_I8MM; } #endif // HAVE_NEON_I8MM +#if HAVE_SVE + if (hwcap & VPX_AARCH64_HWCAP_SVE) { + flags |= HAS_SVE; + } +#endif // HAVE_SVE return flags; } @@ -129,6 +135,10 @@ static int arm_get_cpu_caps(void) { #ifndef ZX_ARM64_FEATURE_ISA_I8MM #define ZX_ARM64_FEATURE_ISA_I8MM ((uint32_t)(1u << 19)) #endif +// Added in https://fuchsia-review.googlesource.com/c/fuchsia/+/895083. +#ifndef ZX_ARM64_FEATURE_ISA_SVE +#define ZX_ARM64_FEATURE_ISA_SVE ((uint32_t)(1u << 20)) +#endif static int arm_get_cpu_caps(void) { int flags = 0; @@ -150,6 +160,11 @@ static int arm_get_cpu_caps(void) { flags |= HAS_NEON_I8MM; } #endif // HAVE_NEON_I8MM +#if HAVE_SVE + if (features & ZX_ARM64_FEATURE_ISA_SVE) { + flags |= HAS_SVE; + } +#endif // HAVE_SVE return flags; } @@ -170,5 +185,13 @@ int arm_cpu_caps(void) { flags &= ~HAS_NEON_I8MM; } + // Restrict flags: FEAT_SVE assumes that FEAT_{DotProd,I8MM} are available. + if (!(flags & HAS_NEON_DOTPROD)) { + flags &= ~HAS_SVE; + } + if (!(flags & HAS_NEON_I8MM)) { + flags &= ~HAS_SVE; + } + return flags; } diff --git a/vpx_ports/arm.h b/vpx_ports/arm.h index 65909d82605..39365d18eec 100644 --- a/vpx_ports/arm.h +++ b/vpx_ports/arm.h @@ -23,6 +23,8 @@ extern "C" { #define HAS_NEON_DOTPROD (1 << 1) // Armv8.2-A optional Neon i8mm instructions, mandatory from Armv8.6-A. #define HAS_NEON_I8MM (1 << 2) +// Armv8.2-A optional SVE instructions, mandatory from Armv9.0-A. +#define HAS_SVE (1 << 3) int arm_cpu_caps(void);