Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

POPCNT runtime detection using CPUID for x86 CPUs #148

Merged
merged 16 commits into from
Apr 4, 2024
Merged
40 changes: 11 additions & 29 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,10 @@ if(NOT isMultiConfig AND NOT CMAKE_BUILD_TYPE)
"Choose the type of build, options are: None Debug Release RelWithDebInfo MinSizeRel." FORCE)
endif()

if(CMAKE_BUILD_TYPE STREQUAL "Debug")
set(ENABLE_ASSERT "ENABLE_ASSERT")
endif()

# primesieve binary source files #####################################

set(BIN_SRC src/app/CmdOptions.cpp
Expand Down Expand Up @@ -106,8 +110,10 @@ endif()
# Check if compiler supports x64 multiarch ###########################

if(WITH_MULTIARCH)
include("${PROJECT_SOURCE_DIR}/cmake/multiarch_popcnt_bmi.cmake")
include("${PROJECT_SOURCE_DIR}/cmake/multiarch_avx512.cmake")
include("${PROJECT_SOURCE_DIR}/cmake/multiarch_avx512_vbmi2.cmake")
if(multiarch_avx512_vbmi2)
set(MULTIARCH_AVX512 "MULTIARCH_AVX512")
endif()
endif()

# libprimesieve (shared library) #####################################
Expand All @@ -123,16 +129,7 @@ if(BUILD_SHARED_LIBS)
set_target_properties(libprimesieve PROPERTIES SOVERSION ${PRIMESIEVE_SOVERSION_MAJOR})
set_target_properties(libprimesieve PROPERTIES VERSION ${PRIMESIEVE_SOVERSION})
target_compile_options(libprimesieve PRIVATE ${FTREE_VECTORIZE_FLAG} ${FVECT_COST_MODEL_FLAG})

if(multiarch_popcnt_bmi)
target_compile_definitions(libprimesieve PRIVATE "MULTIARCH_POPCNT_BMI")
endif()
if(multiarch_avx512)
target_compile_definitions(libprimesieve PRIVATE "MULTIARCH_AVX512")
endif()
if(CMAKE_BUILD_TYPE STREQUAL "Debug")
target_compile_definitions(libprimesieve PRIVATE "ENABLE_ASSERT")
endif()
target_compile_definitions(libprimesieve PRIVATE "${ENABLE_ASSERT}" "${MULTIARCH_AVX512}")

if(WIN32_MSVC_COMPATIBLE)
# On Windows the shared library will be named primesieve.dll
Expand Down Expand Up @@ -170,16 +167,7 @@ if(BUILD_STATIC_LIBS)
set_target_properties(libprimesieve-static PROPERTIES OUTPUT_NAME primesieve)
target_link_libraries(libprimesieve-static PRIVATE Threads::Threads ${LIBATOMIC})
target_compile_options(libprimesieve-static PRIVATE ${FTREE_VECTORIZE_FLAG} ${FVECT_COST_MODEL_FLAG})

if(multiarch_popcnt_bmi)
target_compile_definitions(libprimesieve-static PRIVATE "MULTIARCH_POPCNT_BMI")
endif()
if(multiarch_avx512)
target_compile_definitions(libprimesieve-static PRIVATE "MULTIARCH_AVX512")
endif()
if(CMAKE_BUILD_TYPE STREQUAL "Debug")
target_compile_definitions(libprimesieve-static PRIVATE "ENABLE_ASSERT")
endif()
target_compile_definitions(libprimesieve-static PRIVATE "${ENABLE_ASSERT}" "${MULTIARCH_AVX512}")

if(WITH_MSVC_CRT_STATIC)
set_target_properties(libprimesieve-static PROPERTIES MSVC_RUNTIME_LIBRARY "MultiThreaded")
Expand Down Expand Up @@ -234,16 +222,10 @@ endif()
if(BUILD_PRIMESIEVE)
add_executable(primesieve ${BIN_SRC})
target_link_libraries(primesieve primesieve::primesieve Threads::Threads)
target_compile_definitions(primesieve PRIVATE "${ENABLE_ASSERT}")
target_compile_features(primesieve PRIVATE cxx_auto_type)
install(TARGETS primesieve DESTINATION ${CMAKE_INSTALL_BINDIR})

if(multiarch_avx512)
target_compile_definitions(primesieve PRIVATE "MULTIARCH_AVX512")
endif()
if(CMAKE_BUILD_TYPE STREQUAL "Debug")
target_compile_definitions(primesieve PRIVATE "ENABLE_ASSERT")
endif()

if(WITH_MSVC_CRT_STATIC)
set_target_properties(primesieve PROPERTIES MSVC_RUNTIME_LIBRARY "MultiThreaded")
endif()
Expand Down
7 changes: 7 additions & 0 deletions ChangeLog
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
Changes in version 12.3, 04/04/2024
===================================

* Add runtime POPCNT detection using CPUID for x86 CPUs.
* Improve GCC/Clang multiarch preprocessor logic.
* CMakeLists.txt: Remove POPCNT/BMI check for x86 CPUs.

Changes in version 12.2, 30/03/2024
===================================

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,15 @@ check_cxx_source_compiles("
public:
__attribute__ ((target (\"default\")))
void fillNextPrimes(uint64_t* primes64);
__attribute__ ((target (\"avx512f,avx512vbmi,avx512vbmi2,popcnt\")))
__attribute__ ((target (\"avx512f,avx512vbmi,avx512vbmi2\")))
void fillNextPrimes(uint64_t* primes64);
};
__attribute__ ((target (\"default\")))
void PrimeGenerator::fillNextPrimes(uint64_t* primes64)
{
primes64[0] = 2;
}
__attribute__ ((target (\"avx512f,avx512vbmi,avx512vbmi2,popcnt\")))
__attribute__ ((target (\"avx512f,avx512vbmi,avx512vbmi2\")))
void PrimeGenerator::fillNextPrimes(uint64_t* primes64)
{
__m512i bytes_0_to_7 = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7);
Expand All @@ -36,4 +36,4 @@ check_cxx_source_compiles("
p.fillNextPrimes(primes);
return 0;
}
" multiarch_avx512)
" multiarch_avx512_vbmi2)
57 changes: 0 additions & 57 deletions cmake/multiarch_popcnt_bmi.cmake

This file was deleted.

94 changes: 94 additions & 0 deletions include/primesieve/CPUID.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
///
/// @file CPUID.hpp
/// @brief POPCNT detection fo x86 and x86-64 CPUs.
///
/// Copyright (C) 2024 Kim Walisch, <[email protected]>
///
/// This file is distributed under the BSD License. See the COPYING
/// file in the top level directory.
///

#ifndef CPUID_HPP
#define CPUID_HPP

// Enable on x86 and x86-64 CPUs
#if defined(__x86_64__) || \
defined(__i386__) || \
defined(_M_X64) || \
defined(_M_IX86)

// Both GCC and Clang (even Clang on Windows) define the __POPCNT__
// macro if the user compiles with -mpopcnt. The __POPCNT__
// macro is even defined if the user compiles with other flags
// such as -mavx or -march=native.
#if defined(__POPCNT__)
#define HAS_POPCNT
// The MSVC compiler does not support a POPCNT macro, but if the user
// compiles with e.g. /arch:AVX or /arch:AVX512 then MSVC defines
// the __AVX__ macro and POPCNT is also supported.
#elif defined(_MSC_VER) && defined(__AVX__)
#define HAS_POPCNT
#endif

#if defined(_MSC_VER)
#include <intrin.h>
#endif

namespace {

inline void run_CPUID(int eax, int ecx, int* abcd)
{
#if defined(_MSC_VER)
__cpuidex(abcd, eax, ecx);
#else
int ebx = 0;
int edx = 0;

#if defined(__i386__) && \
defined(__PIC__)
/* in case of PIC under 32-bit EBX cannot be clobbered */
__asm__ ("movl %%ebx, %%edi;"
"cpuid;"
"xchgl %%ebx, %%edi;"
: "=D" (ebx),
"+a" (eax),
"+c" (ecx),
"=d" (edx));
#else
__asm__ ("cpuid;"
: "+b" (ebx),
"+a" (eax),
"+c" (ecx),
"=d" (edx));
#endif

abcd[0] = eax;
abcd[1] = ebx;
abcd[2] = ecx;
abcd[3] = edx;
#endif
}

#if !defined(HAS_POPCNT)
#define ENABLE_CPUID_POPCNT

inline bool run_CPUID_POPCNT()
{
// %ecx POPCNT bit flag
int bit_POPCNT = 1 << 23;
int abcd[4];

run_CPUID(1, 0, abcd);
return (abcd[2] & bit_POPCNT) == bit_POPCNT;
}

/// Initialized at startup
const bool HAS_CPUID_POPCNT = run_CPUID_POPCNT();

#endif // ENABLE_CPUID_POPCNT

} // namespace

#endif // x86 CPU

#endif
34 changes: 21 additions & 13 deletions include/primesieve/PrimeGenerator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
/// returns the primes. When there are no more primes left in
/// the vector PrimeGenerator generates new primes.
///
/// Copyright (C) 2023 Kim Walisch, <[email protected]>
/// Copyright (C) 2024 Kim Walisch, <[email protected]>
///
/// This file is distributed under the BSD License. See the COPYING
/// file in the top level directory.
Expand All @@ -23,6 +23,21 @@
#include <stdint.h>
#include <cstddef>

#if defined(MULTIARCH_AVX512)
// GCC/Clang function multiversioning for AVX512 is not needed if
// the user compiles with -mavx512f -mavx512vbmi -mavx512vbmi2.
// GCC/Clang function multiversioning generally causes a minor
// overhead, hence we disable it if it is not needed.
#if defined(__AVX512__) || (defined(__AVX512F__) && \
defined(__AVX512VBMI__) && \
defined(__AVX512VBMI2__))
#undef MULTIARCH_AVX512
#else
#define MULTIARCH_TARGET_DEFAULT
#define MULTIARCH_TARGET_AVX512
#endif
#endif

namespace primesieve {

class PreSieve;
Expand All @@ -34,22 +49,15 @@ class PrimeGenerator : public Erat
void fillPrevPrimes(Vector<uint64_t>& primes, std::size_t* size);
static uint64_t maxCachedPrime();

#if defined(MULTIARCH_POPCNT_BMI)
#define MULTIARCH
__attribute__ ((target ("popcnt,bmi")))
void fillNextPrimes(Vector<uint64_t>& primes, std::size_t* size);
#if defined(MULTIARCH_TARGET_DEFAULT)
__attribute__ ((target ("default")))
#endif

#if defined(MULTIARCH_AVX512)
#define MULTIARCH
__attribute__ ((target ("avx512f,avx512vbmi,avx512vbmi2,popcnt")))
void fillNextPrimes(Vector<uint64_t>& primes, std::size_t* size);
#endif

#if defined(MULTIARCH)
__attribute__ ((target ("default")))
#endif
#if defined(MULTIARCH_TARGET_AVX512)
__attribute__ ((target ("avx512f,avx512vbmi,avx512vbmi2")))
void fillNextPrimes(Vector<uint64_t>& primes, std::size_t* size);
#endif

private:
bool isInit_ = false;
Expand Down
Loading
Loading