Skip to content

Commit

Permalink
Simplify CPUID code
Browse files Browse the repository at this point in the history
  • Loading branch information
kimwalisch committed Jun 22, 2024
1 parent 0f517db commit 061cf27
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 97 deletions.
66 changes: 4 additions & 62 deletions include/primesieve/cpu_supports_avx512_vbmi2.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,74 +11,16 @@
#ifndef CPU_SUPPORTS_AVX512_VBMI2_HPP
#define CPU_SUPPORTS_AVX512_VBMI2_HPP

#include "cpuid.hpp"
namespace primesieve {

#if defined(_MSC_VER)
#include <immintrin.h>
#endif

// %ebx bit flags
#define bit_AVX512F (1 << 16)

// %ecx bit flags
#define bit_AVX512VBMI (1 << 1)
#define bit_AVX512VBMI2 (1 << 6)
bool has_cpuid_avx512_vbmi2();

// xgetbv bit flags
#define XSTATE_SSE (1 << 1)
#define XSTATE_YMM (1 << 2)
#define XSTATE_ZMM (7 << 5)
} // namespace

namespace {

// Get Value of Extended Control Register
inline int get_xcr0()
{
int xcr0;

#if defined(_MSC_VER)
xcr0 = (int) _xgetbv(0);
#else
__asm__ ("xgetbv" : "=a" (xcr0) : "c" (0) : "%edx" );
#endif

return xcr0;
}

inline bool run_cpuid_avx512_vbmi2()
{
int abcd[4];

run_cpuid(1, 0, abcd);

int osxsave_mask = (1 << 27);

// Ensure OS supports extended processor state management
if ((abcd[2] & osxsave_mask) != osxsave_mask)
return false;

int ymm_mask = XSTATE_SSE | XSTATE_YMM;
int zmm_mask = XSTATE_SSE | XSTATE_YMM | XSTATE_ZMM;

int xcr0 = get_xcr0();

// Check AVX OS support
if ((xcr0 & ymm_mask) != ymm_mask)
return false;

// Check AVX512 OS support
if ((xcr0 & zmm_mask) != zmm_mask)
return false;

run_cpuid(7, 0, abcd);

// PrimeGenerator::fillNextPrimes() requires AVX512F, AVX512VBMI & AVX512VBMI2
return ((abcd[1] & bit_AVX512F) == bit_AVX512F &&
(abcd[2] & (bit_AVX512VBMI | bit_AVX512VBMI2)) == (bit_AVX512VBMI | bit_AVX512VBMI2));
}

/// Initialized at startup
bool cpu_supports_avx512_vbmi2 = run_cpuid_avx512_vbmi2();
bool cpu_supports_avx512_vbmi2 = primesieve::has_cpuid_avx512_vbmi2();

} // namespace

Expand Down
39 changes: 4 additions & 35 deletions include/primesieve/cpu_supports_popcnt.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,48 +11,17 @@
#ifndef CPU_SUPPORTS_POPCNT_HPP
#define CPU_SUPPORTS_POPCNT_HPP

// Enable CPUID on x86 and x86-64 CPUs
#if defined(__x86_64__) || \
defined(__i386__) || \
defined(_M_X64) || \
defined(_M_IX86)

// Both GCC and Clang (even Clang on Windows) define the __POPCNT__
// macro if the user compiles with -mpopcnt. The __POPCNT__
// macro is even defined if the user compiles with other flags
// such as -mavx or -march=native.
#if defined(__POPCNT__)
#define HAS_POPCNT
// The MSVC compiler does not support a POPCNT macro, but if the user
// compiles with e.g. /arch:AVX or /arch:AVX512 then MSVC defines
// the __AVX__ macro and POPCNT is also supported.
#elif defined(_MSC_VER) && defined(__AVX__)
#define HAS_POPCNT
#endif
namespace primesieve {

#if !defined(HAS_POPCNT)
bool has_cpuid_popcnt();

#include "cpuid.hpp"
#define ENABLE_CPUID_POPCNT
} // namespace

namespace {

inline bool run_cpuid_supports_popcnt()
{
int abcd[4];
run_cpuid(1, 0, abcd);

// %ecx POPCNT bit flag
int bit_POPCNT = 1 << 23;
return (abcd[2] & bit_POPCNT) == bit_POPCNT;
}

/// Initialized at startup
bool cpu_supports_popcnt = run_cpuid_supports_popcnt();
bool cpu_supports_popcnt = primesieve::has_cpuid_popcnt();

} // namespace

#endif // !defined(HAS_POPCNT)
#endif // CPUID

#endif

0 comments on commit 061cf27

Please sign in to comment.