diff --git a/include/primesieve/cpu_supports_avx512_vbmi2.hpp b/include/primesieve/cpu_supports_avx512_vbmi2.hpp index 5ab575eb..0e59e47c 100644 --- a/include/primesieve/cpu_supports_avx512_vbmi2.hpp +++ b/include/primesieve/cpu_supports_avx512_vbmi2.hpp @@ -11,74 +11,16 @@ #ifndef CPU_SUPPORTS_AVX512_VBMI2_HPP #define CPU_SUPPORTS_AVX512_VBMI2_HPP -#include "cpuid.hpp" +namespace primesieve { -#if defined(_MSC_VER) - #include -#endif - -// %ebx bit flags -#define bit_AVX512F (1 << 16) - -// %ecx bit flags -#define bit_AVX512VBMI (1 << 1) -#define bit_AVX512VBMI2 (1 << 6) +bool has_cpuid_avx512_vbmi2(); -// xgetbv bit flags -#define XSTATE_SSE (1 << 1) -#define XSTATE_YMM (1 << 2) -#define XSTATE_ZMM (7 << 5) +} // namespace namespace { -// Get Value of Extended Control Register -inline int get_xcr0() -{ - int xcr0; - -#if defined(_MSC_VER) - xcr0 = (int) _xgetbv(0); -#else - __asm__ ("xgetbv" : "=a" (xcr0) : "c" (0) : "%edx" ); -#endif - - return xcr0; -} - -inline bool run_cpuid_avx512_vbmi2() -{ - int abcd[4]; - - run_cpuid(1, 0, abcd); - - int osxsave_mask = (1 << 27); - - // Ensure OS supports extended processor state management - if ((abcd[2] & osxsave_mask) != osxsave_mask) - return false; - - int ymm_mask = XSTATE_SSE | XSTATE_YMM; - int zmm_mask = XSTATE_SSE | XSTATE_YMM | XSTATE_ZMM; - - int xcr0 = get_xcr0(); - - // Check AVX OS support - if ((xcr0 & ymm_mask) != ymm_mask) - return false; - - // Check AVX512 OS support - if ((xcr0 & zmm_mask) != zmm_mask) - return false; - - run_cpuid(7, 0, abcd); - - // PrimeGenerator::fillNextPrimes() requires AVX512F, AVX512VBMI & AVX512VBMI2 - return ((abcd[1] & bit_AVX512F) == bit_AVX512F && - (abcd[2] & (bit_AVX512VBMI | bit_AVX512VBMI2)) == (bit_AVX512VBMI | bit_AVX512VBMI2)); -} - /// Initialized at startup -bool cpu_supports_avx512_vbmi2 = run_cpuid_avx512_vbmi2(); +bool cpu_supports_avx512_vbmi2 = primesieve::has_cpuid_avx512_vbmi2(); } // namespace diff --git a/include/primesieve/cpu_supports_popcnt.hpp b/include/primesieve/cpu_supports_popcnt.hpp index 212c0f3b..ded65c3e 100644 --- a/include/primesieve/cpu_supports_popcnt.hpp +++ b/include/primesieve/cpu_supports_popcnt.hpp @@ -11,48 +11,17 @@ #ifndef CPU_SUPPORTS_POPCNT_HPP #define CPU_SUPPORTS_POPCNT_HPP -// Enable CPUID on x86 and x86-64 CPUs -#if defined(__x86_64__) || \ - defined(__i386__) || \ - defined(_M_X64) || \ - defined(_M_IX86) - -// Both GCC and Clang (even Clang on Windows) define the __POPCNT__ -// macro if the user compiles with -mpopcnt. The __POPCNT__ -// macro is even defined if the user compiles with other flags -// such as -mavx or -march=native. -#if defined(__POPCNT__) - #define HAS_POPCNT -// The MSVC compiler does not support a POPCNT macro, but if the user -// compiles with e.g. /arch:AVX or /arch:AVX512 then MSVC defines -// the __AVX__ macro and POPCNT is also supported. -#elif defined(_MSC_VER) && defined(__AVX__) - #define HAS_POPCNT -#endif +namespace primesieve { -#if !defined(HAS_POPCNT) +bool has_cpuid_popcnt(); -#include "cpuid.hpp" -#define ENABLE_CPUID_POPCNT +} // namespace namespace { -inline bool run_cpuid_supports_popcnt() -{ - int abcd[4]; - run_cpuid(1, 0, abcd); - - // %ecx POPCNT bit flag - int bit_POPCNT = 1 << 23; - return (abcd[2] & bit_POPCNT) == bit_POPCNT; -} - /// Initialized at startup -bool cpu_supports_popcnt = run_cpuid_supports_popcnt(); +bool cpu_supports_popcnt = primesieve::has_cpuid_popcnt(); } // namespace -#endif // !defined(HAS_POPCNT) -#endif // CPUID - #endif