diff --git a/hwy/cache_control.h b/hwy/cache_control.h index b03341ba94..90743cd3f2 100644 --- a/hwy/cache_control.h +++ b/hwy/cache_control.h @@ -98,9 +98,10 @@ template HWY_INLINE HWY_ATTR_CACHE void Prefetch(const T* p) { (void)p; #ifndef HWY_DISABLE_CACHE_CONTROL -#if HWY_ARCH_X86 +// Use _mm_prefetch on x86/x64, except when clang-cl is compiled with -mno-mmx. +#if HWY_ARCH_X86 && !(HWY_COMPILER_CLANGCL && !defined(__MMX__)) _mm_prefetch(reinterpret_cast(p), _MM_HINT_T0); -#elif HWY_COMPILER_GCC // includes clang +#elif HWY_COMPILER_GCC || HWY_COMPILER_CLANGCL // includes clang // Hint=0 (NTA) behavior differs, but skipping outer caches is probably not // desirable, so use the default 3 (keep in caches). __builtin_prefetch(p, /*write=*/0, /*hint=*/3);