Skip to content

Commit

Permalink
Simplify preprocessor logic
Browse files Browse the repository at this point in the history
  • Loading branch information
kimwalisch committed Apr 7, 2024
1 parent 55cdcce commit 2097a56
Show file tree
Hide file tree
Showing 5 changed files with 49 additions and 44 deletions.
6 changes: 3 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ endif()
if(WITH_MULTIARCH)
include("${PROJECT_SOURCE_DIR}/cmake/multiarch_avx512_vbmi2.cmake")
if(multiarch_avx512_vbmi2)
set(MULTIARCH_AVX512 "MULTIARCH_AVX512")
set(ENABLE_MULTIARCH_AVX512 "ENABLE_MULTIARCH_AVX512")
endif()
endif()

Expand All @@ -129,7 +129,7 @@ if(BUILD_SHARED_LIBS)
set_target_properties(libprimesieve PROPERTIES SOVERSION ${PRIMESIEVE_SOVERSION_MAJOR})
set_target_properties(libprimesieve PROPERTIES VERSION ${PRIMESIEVE_SOVERSION})
target_compile_options(libprimesieve PRIVATE ${FTREE_VECTORIZE_FLAG} ${FVECT_COST_MODEL_FLAG})
target_compile_definitions(libprimesieve PRIVATE "${ENABLE_ASSERT}" "${MULTIARCH_AVX512}")
target_compile_definitions(libprimesieve PRIVATE "${ENABLE_ASSERT}" "${ENABLE_MULTIARCH_AVX512}")

if(WIN32_MSVC_COMPATIBLE)
# On Windows the shared library will be named primesieve.dll
Expand Down Expand Up @@ -167,7 +167,7 @@ if(BUILD_STATIC_LIBS)
set_target_properties(libprimesieve-static PROPERTIES OUTPUT_NAME primesieve)
target_link_libraries(libprimesieve-static PRIVATE Threads::Threads ${LIBATOMIC})
target_compile_options(libprimesieve-static PRIVATE ${FTREE_VECTORIZE_FLAG} ${FVECT_COST_MODEL_FLAG})
target_compile_definitions(libprimesieve-static PRIVATE "${ENABLE_ASSERT}" "${MULTIARCH_AVX512}")
target_compile_definitions(libprimesieve-static PRIVATE "${ENABLE_ASSERT}" "${ENABLE_MULTIARCH_AVX512}")

if(WITH_MSVC_CRT_STATIC)
set_target_properties(libprimesieve-static PROPERTIES MSVC_RUNTIME_LIBRARY "MultiThreaded")
Expand Down
19 changes: 17 additions & 2 deletions cmake/multiarch_avx512_vbmi2.cmake
Original file line number Diff line number Diff line change
@@ -1,24 +1,38 @@
include(CheckCXXSourceCompiles)

# We use GCC/Clang's function multi-versioning for AVX512
# support. This code will automatically dispatch to the
# AVX512 algorithm if the CPU supports AVX512 and use the
# default (portable) algorithm otherwise.

include(CheckCXXSourceCompiles)

check_cxx_source_compiles("
#include <immintrin.h>
#include <stdint.h>
// GCC/Clang function multiversioning for AVX512 is not needed if
// the user compiles with -mavx512f -mavx512vbmi -mavx512vbmi2.
// GCC/Clang function multiversioning generally causes a minor
// overhead, hence we disable it if it is not needed.
#if defined(__AVX512F__) && \
defined(__AVX512VBMI__) && \
defined(__AVX512VBMI2__)
Error: AVX512VBMI2 multiarch not needed!
#endif
class PrimeGenerator {
public:
__attribute__ ((target (\"default\")))
void fillNextPrimes(uint64_t* primes64);
__attribute__ ((target (\"avx512f,avx512vbmi,avx512vbmi2\")))
void fillNextPrimes(uint64_t* primes64);
};
__attribute__ ((target (\"default\")))
void PrimeGenerator::fillNextPrimes(uint64_t* primes64)
{
primes64[0] = 2;
}
__attribute__ ((target (\"avx512f,avx512vbmi,avx512vbmi2\")))
void PrimeGenerator::fillNextPrimes(uint64_t* primes64)
{
Expand All @@ -29,6 +43,7 @@ check_cxx_source_compiles("
vprimes = _mm512_add_epi64(base, vprimes);
_mm512_storeu_si512(primes64, vprimes);
}
int main()
{
uint64_t primes[8];
Expand Down
28 changes: 7 additions & 21 deletions include/primesieve/PrimeGenerator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,21 +23,6 @@
#include <stdint.h>
#include <cstddef>

#if defined(MULTIARCH_AVX512)
// GCC/Clang function multiversioning for AVX512 is not needed if
// the user compiles with -mavx512f -mavx512vbmi -mavx512vbmi2.
// GCC/Clang function multiversioning generally causes a minor
// overhead, hence we disable it if it is not needed.
#if defined(__AVX512__) || (defined(__AVX512F__) && \
defined(__AVX512VBMI__) && \
defined(__AVX512VBMI2__))
#undef MULTIARCH_AVX512
#else
#define MULTIARCH_TARGET_DEFAULT
#define MULTIARCH_TARGET_AVX512
#endif
#endif

namespace primesieve {

class PreSieve;
Expand All @@ -49,16 +34,17 @@ class PrimeGenerator : public Erat
void fillPrevPrimes(Vector<uint64_t>& primes, std::size_t* size);
static uint64_t maxCachedPrime();

#if defined(MULTIARCH_TARGET_DEFAULT)
__attribute__ ((target ("default")))
#endif
void fillNextPrimes(Vector<uint64_t>& primes, std::size_t* size);

#if defined(MULTIARCH_TARGET_AVX512)
#if defined(ENABLE_MULTIARCH_AVX512)
#define ENABLE_MULTIARCH_DEFAULT
__attribute__ ((target ("avx512f,avx512vbmi,avx512vbmi2")))
void fillNextPrimes(Vector<uint64_t>& primes, std::size_t* size);
#endif

#if defined(ENABLE_MULTIARCH_DEFAULT)
__attribute__ ((target ("default")))
#endif
void fillNextPrimes(Vector<uint64_t>& primes, std::size_t* size);

private:
bool isInit_ = false;
uint64_t low_ = 0;
Expand Down
38 changes: 21 additions & 17 deletions src/PrimeGenerator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,21 +35,25 @@
#include <algorithm>
#include <limits>

// x86-64 AVX512
#if __has_include(<immintrin.h>) && \
(defined(__AVX512__) || (defined(__AVX512F__) && \
defined(__AVX512VBMI__) && \
defined(__AVX512VBMI2__)))
#if defined(ENABLE_MULTIARCH_AVX512) && \
__has_include(<immintrin.h>)
#include <immintrin.h>
#define HAS_AVX512_VBMI2

// GCC/Clang function multiversioning
#elif defined(MULTIARCH_TARGET_AVX512) && \
__has_include(<immintrin.h>)
#elif defined(__AVX512F__) && \
defined(__AVX512VBMI__) && \
defined(__AVX512VBMI2__) && \
__has_include(<immintrin.h>)
#include <immintrin.h>
#define ENABLE_AVX512

#elif defined(_MSC_VER) && \
defined(__AVX512__) && \
__has_include(<immintrin.h>)
#include <immintrin.h>
#define ENABLE_AVX512

#else // Default portable algorithm
#define DEFAULT_CPU_ARCH
#else
#define ENABLE_DEFAULT
#endif

namespace {
Expand Down Expand Up @@ -406,8 +410,8 @@ void PrimeGenerator::fillPrevPrimes(Vector<uint64_t>& primes,
}
}

#if defined(DEFAULT_CPU_ARCH) || \
defined(MULTIARCH_TARGET_DEFAULT)
#if defined(ENABLE_DEFAULT) || \
defined(ENABLE_MULTIARCH_DEFAULT)

/// This method is used by iterator::next_prime().
/// This method stores only the next few primes (~ 1000) in the
Expand All @@ -416,7 +420,7 @@ void PrimeGenerator::fillPrevPrimes(Vector<uint64_t>& primes,
/// this reason iterator::next_prime() runs up to 2x faster
/// than iterator::prev_prime().
///
#if defined(MULTIARCH_TARGET_DEFAULT)
#if defined(ENABLE_MULTIARCH_DEFAULT)
__attribute__ ((target ("default")))
#endif
void PrimeGenerator::fillNextPrimes(Vector<uint64_t>& primes,
Expand Down Expand Up @@ -475,8 +479,8 @@ void PrimeGenerator::fillNextPrimes(Vector<uint64_t>& primes,

#endif

#if defined(HAS_AVX512_VBMI2) || \
defined(MULTIARCH_TARGET_AVX512)
#if defined(ENABLE_AVX512) || \
defined(ENABLE_MULTIARCH_AVX512)

/// This algorithm converts 1 bits from the sieve array into primes
/// using AVX512. The algorithm is a modified version of the AVX512
Expand All @@ -491,7 +495,7 @@ void PrimeGenerator::fillNextPrimes(Vector<uint64_t>& primes,
/// benchmarks this algorithm ran about 10% faster than the default
/// fillNextPrimes() algorithm which uses __builtin_ctzll().
///
#if defined(MULTIARCH_TARGET_AVX512)
#if defined(ENABLE_MULTIARCH_AVX512)
__attribute__ ((target ("avx512f,avx512vbmi,avx512vbmi2")))
#endif
void PrimeGenerator::fillNextPrimes(Vector<uint64_t>& primes,
Expand Down
2 changes: 1 addition & 1 deletion test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,6 @@ foreach(file ${files})
get_filename_component(binary_name ${file} NAME_WE)
add_executable(${binary_name} ${file})
target_link_libraries(${binary_name} primesieve::primesieve)
target_compile_definitions(${binary_name} PRIVATE "${ENABLE_ASSERT}" "${MULTIARCH_AVX512}")
target_compile_definitions(${binary_name} PRIVATE "${ENABLE_ASSERT}" "${ENABLE_MULTIARCH_AVX512}")
add_test(NAME ${binary_name} COMMAND ${binary_name})
endforeach()

0 comments on commit 2097a56

Please sign in to comment.