Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move x86 CPUID code from cpuid.hpp to cpuid.cpp #150

Merged
merged 9 commits into from
Jun 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 21 additions & 16 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
cmake_minimum_required(VERSION 3.4...3.27)
project(primesieve CXX)
set(PRIMESIEVE_VERSION "12.3")
set(PRIMESIEVE_SOVERSION "12.3.0")
set(PRIMESIEVE_VERSION "12.4")
set(PRIMESIEVE_SOVERSION "12.4.0")

# Build options ######################################################

Expand Down Expand Up @@ -47,7 +47,7 @@ if(NOT isMultiConfig AND NOT CMAKE_BUILD_TYPE)
endif()

if(CMAKE_BUILD_TYPE STREQUAL "Debug")
set(ENABLE_ASSERT "ENABLE_ASSERT")
list(APPEND PRIMESIEVE_COMPILE_DEFINITIONS "ENABLE_ASSERT")
endif()

# primesieve binary source files #####################################
Expand Down Expand Up @@ -82,6 +82,17 @@ set(LIB_SRC src/api-c.cpp
src/RiemannR.cpp
src/SievingPrimes.cpp)

# Check if compiler supports CPU multiarch ###########################

if(WITH_MULTIARCH)
include("${PROJECT_SOURCE_DIR}/cmake/multiarch_x86_popcnt.cmake")
include("${PROJECT_SOURCE_DIR}/cmake/multiarch_avx512_vbmi2.cmake")

if(multiarch_x86_popcnt OR multiarch_avx512_vbmi2)
set(LIB_SRC ${LIB_SRC} src/x86/cpuid.cpp)
endif()
endif()

# Required includes ##################################################

include(GNUInstallDirs)
Expand All @@ -107,26 +118,20 @@ if(WITH_AUTO_VECTORIZATION)
include("${PROJECT_SOURCE_DIR}/cmake/auto_vectorization.cmake")
endif()

# Check if compiler supports x64 multiarch ###########################

if(WITH_MULTIARCH)
include("${PROJECT_SOURCE_DIR}/cmake/multiarch_avx512_vbmi2.cmake")
endif()

# libprimesieve (shared library) #####################################

find_package(Threads REQUIRED QUIET)

if(BUILD_SHARED_LIBS)
add_library(libprimesieve SHARED ${LIB_SRC})
set_target_properties(libprimesieve PROPERTIES OUTPUT_NAME primesieve)
target_link_libraries(libprimesieve PRIVATE Threads::Threads ${LIBATOMIC})
target_link_libraries(libprimesieve PRIVATE Threads::Threads ${PRIMESIEVE_LINK_LIBRARIES})
string(REPLACE "." ";" SOVERSION_LIST ${PRIMESIEVE_SOVERSION})
list(GET SOVERSION_LIST 0 PRIMESIEVE_SOVERSION_MAJOR)
set_target_properties(libprimesieve PROPERTIES SOVERSION ${PRIMESIEVE_SOVERSION_MAJOR})
set_target_properties(libprimesieve PROPERTIES VERSION ${PRIMESIEVE_SOVERSION})
target_compile_options(libprimesieve PRIVATE ${FTREE_VECTORIZE_FLAG} ${FVECT_COST_MODEL_FLAG})
target_compile_definitions(libprimesieve PRIVATE "${ENABLE_ASSERT}" "${ENABLE_MULTIARCH_AVX512}")
target_compile_options(libprimesieve PRIVATE ${PRIMESIEVE_COMPILE_OPTIONS})
target_compile_definitions(libprimesieve PRIVATE ${PRIMESIEVE_COMPILE_DEFINITIONS})

if(WIN32_MSVC_COMPATIBLE)
# On Windows the shared library will be named primesieve.dll
Expand Down Expand Up @@ -162,9 +167,9 @@ endif()
if(BUILD_STATIC_LIBS)
add_library(libprimesieve-static STATIC ${LIB_SRC})
set_target_properties(libprimesieve-static PROPERTIES OUTPUT_NAME primesieve)
target_link_libraries(libprimesieve-static PRIVATE Threads::Threads ${LIBATOMIC})
target_compile_options(libprimesieve-static PRIVATE ${FTREE_VECTORIZE_FLAG} ${FVECT_COST_MODEL_FLAG})
target_compile_definitions(libprimesieve-static PRIVATE "${ENABLE_ASSERT}" "${ENABLE_MULTIARCH_AVX512}")
target_link_libraries(libprimesieve-static PRIVATE Threads::Threads ${PRIMESIEVE_LINK_LIBRARIES})
target_compile_options(libprimesieve-static PRIVATE ${PRIMESIEVE_COMPILE_OPTIONS})
target_compile_definitions(libprimesieve-static PRIVATE ${PRIMESIEVE_COMPILE_DEFINITIONS})

if(WITH_MSVC_CRT_STATIC)
set_target_properties(libprimesieve-static PROPERTIES MSVC_RUNTIME_LIBRARY "MultiThreaded")
Expand Down Expand Up @@ -219,7 +224,7 @@ endif()
if(BUILD_PRIMESIEVE)
add_executable(primesieve ${BIN_SRC})
target_link_libraries(primesieve primesieve::primesieve Threads::Threads)
target_compile_definitions(primesieve PRIVATE "${ENABLE_ASSERT}")
target_compile_definitions(primesieve PRIVATE ${PRIMESIEVE_COMPILE_DEFINITIONS})
target_compile_features(primesieve PRIVATE cxx_auto_type)
install(TARGETS primesieve DESTINATION ${CMAKE_INSTALL_BINDIR})

Expand Down
8 changes: 8 additions & 0 deletions ChangeLog
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
Changes in version 12.4, 22/06/2024
===================================

* Move x86 CPUID code from cpuid.hpp to src/x86/cpuid.cpp.
* multiarch_x86_popcnt.cmake: Detect x86 POPCNT support.
* CMakeLists.txt: Use CMake list for all compile time definitions.
* CMakeLists.txt: Use CMake list for all link libraries.

Changes in version 12.3, 15/04/2024
===================================

Expand Down
4 changes: 2 additions & 2 deletions cmake/auto_vectorization.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,14 @@ check_cxx_compiler_flag(-ftree-vectorize ftree_vectorize)
cmake_pop_check_state()

if(ftree_vectorize)
set(FTREE_VECTORIZE_FLAG "-ftree-vectorize")
list(APPEND PRIMESIEVE_COMPILE_OPTIONS "-ftree-vectorize")

cmake_push_check_state()
set(CMAKE_REQUIRED_FLAGS -Werror)
check_cxx_compiler_flag(-fvect-cost-model=dynamic fvect_cost_model)
cmake_pop_check_state()

if(fvect_cost_model)
set(FVECT_COST_MODEL_FLAG "-fvect-cost-model=dynamic")
list(APPEND PRIMESIEVE_COMPILE_OPTIONS "-fvect-cost-model=dynamic")
endif()
endif()
5 changes: 4 additions & 1 deletion cmake/libatomic.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,10 @@ if(NOT atomic64)
}"
atomic64_with_libatomic)

if (NOT atomic64_with_libatomic)
if(atomic64_with_libatomic)
list(APPEND PRIMESIEVE_LINK_LIBRARIES "${LIBATOMIC}")
else()
set(LIBATOMIC "")
message(FATAL_ERROR "Failed to compile std::atomic, libatomic likely not found!")
endif()
endif()
Expand Down
14 changes: 7 additions & 7 deletions cmake/multiarch_avx512_vbmi2.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ include(CheckCXXSourceCompiles)
include(CMakePushCheckState)

cmake_push_check_state()
set(CMAKE_REQUIRED_INCLUDES "${PROJECT_SOURCE_DIR}/include")
set(CMAKE_REQUIRED_INCLUDES "${PROJECT_SOURCE_DIR}")

check_cxx_source_compiles("
// GCC/Clang function multiversioning for AVX512 is not needed if
Expand All @@ -20,19 +20,19 @@ check_cxx_source_compiles("
Error: AVX512VBMI2 multiarch not needed!
#endif

#include <primesieve/cpu_supports_avx512_vbmi2.hpp>
#include <src/x86/cpuid.cpp>
#include <immintrin.h>
#include <stdint.h>

class PrimeGenerator {
public:
__attribute__ ((target (\"avx512f,avx512vbmi,avx512vbmi2\")))
void fillNextPrimes_avx512(uint64_t* primes64);
void fillNextPrimes_avx512_vbmi2(uint64_t* primes64);
void fillNextPrimes_default(uint64_t* primes64);
void fillNextPrimes(uint64_t* primes64)
{
if (cpu_supports_avx512_vbmi2)
fillNextPrimes_avx512(primes64);
if (primesieve::has_cpuid_avx512_vbmi2())
fillNextPrimes_avx512_vbmi2(primes64);
else
fillNextPrimes_default(primes64);
}
Expand All @@ -44,7 +44,7 @@ check_cxx_source_compiles("
}

__attribute__ ((target (\"avx512f,avx512vbmi,avx512vbmi2\")))
void PrimeGenerator::fillNextPrimes_avx512(uint64_t* primes64)
void PrimeGenerator::fillNextPrimes_avx512_vbmi2(uint64_t* primes64)
{
__m512i bytes_0_to_7 = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7);
__m512i base = _mm512_set1_epi64(123);
Expand All @@ -64,7 +64,7 @@ check_cxx_source_compiles("
" multiarch_avx512_vbmi2)

if(multiarch_avx512_vbmi2)
set(ENABLE_MULTIARCH_AVX512 "ENABLE_MULTIARCH_AVX512")
list(APPEND PRIMESIEVE_COMPILE_DEFINITIONS "ENABLE_MULTIARCH_AVX512_VBMI2")
endif()

cmake_pop_check_state()
53 changes: 53 additions & 0 deletions cmake/multiarch_x86_popcnt.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# On x86 CPUs we need to enable the use of cpuid.cpp.
# If cpuid.cpp compiles we assume it is a x86 CPU.

include(CheckCXXSourceCompiles)
include(CMakePushCheckState)

cmake_push_check_state()
set(CMAKE_REQUIRED_INCLUDES "${PROJECT_SOURCE_DIR}")

check_cxx_source_compiles("
// Enable CPUID for POPCNT on x86 and x86-64 CPUs.
// This is required because not all x86 and x86-64 CPUs
// support the POPCNT instruction.
#if !(defined(__x86_64__) || \
defined(__i386__) || \
defined(_M_X64) || \
defined(_M_IX86))
Error: x86 POPCNT multiarch not needed!
#endif

// Both GCC and Clang (even Clang on Windows) define the __POPCNT__
// macro if the user compiles with -mpopcnt. The __POPCNT__
// macro is even defined if the user compiles with other flags
// such as -mavx or -march=native.
#if defined(__POPCNT__)
Error: x86 POPCNT multiarch not needed!

// The MSVC compiler does not support a POPCNT macro, but if the user
// compiles with e.g. /arch:AVX or /arch:AVX512 then MSVC defines
// the __AVX__ macro and POPCNT is also supported.
#elif defined(_MSC_VER) && defined(__AVX__)
Error: x86 POPCNT multiarch not needed!
#endif

#include <src/x86/cpuid.cpp>
#include <iostream>

int main()
{
if (primesieve::has_cpuid_popcnt())
std::cout << \"CPU supports POPCNT!\" << std::endl;
else
std::cout << \"CPU does not support POPCNT!\" << std::endl;

return 0;
}
" multiarch_x86_popcnt)

if(multiarch_x86_popcnt)
list(APPEND PRIMESIEVE_COMPILE_DEFINITIONS "ENABLE_MULTIARCH_x86_POPCNT")
endif()

cmake_pop_check_state()
4 changes: 2 additions & 2 deletions include/primesieve.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@
#ifndef PRIMESIEVE_H
#define PRIMESIEVE_H

#define PRIMESIEVE_VERSION "12.3"
#define PRIMESIEVE_VERSION "12.4"
#define PRIMESIEVE_VERSION_MAJOR 12
#define PRIMESIEVE_VERSION_MINOR 3
#define PRIMESIEVE_VERSION_MINOR 4

#include <primesieve/iterator.h>

Expand Down
4 changes: 2 additions & 2 deletions include/primesieve.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@
#ifndef PRIMESIEVE_HPP
#define PRIMESIEVE_HPP

#define PRIMESIEVE_VERSION "12.3"
#define PRIMESIEVE_VERSION "12.4"
#define PRIMESIEVE_VERSION_MAJOR 12
#define PRIMESIEVE_VERSION_MINOR 3
#define PRIMESIEVE_VERSION_MINOR 4

#include <primesieve/iterator.hpp>
#include <primesieve/primesieve_error.hpp>
Expand Down
3 changes: 1 addition & 2 deletions include/primesieve/CpuInfo.hpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
///
/// @file CpuInfo.hpp
///
/// Copyright (C) 2023 Kim Walisch, <[email protected]>
/// Copyright (C) 2024 Kim Walisch, <[email protected]>
///
/// This file is distributed under the BSD License. See the COPYING
/// file in the top level directory.
Expand All @@ -22,7 +22,6 @@ class CpuInfo
public:
CpuInfo();
bool hasCpuName() const;
bool hasAVX512() const;
bool hasLogicalCpuCores() const;
bool hasL1Cache() const;
bool hasL2Cache() const;
Expand Down
4 changes: 2 additions & 2 deletions include/primesieve/Erat.hpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
///
/// @file Erat.hpp
///
/// Copyright (C) 2023 Kim Walisch, <[email protected]>
/// Copyright (C) 2024 Kim Walisch, <[email protected]>
///
/// This file is distributed under the BSD License. See the COPYING
/// file in the top level directory.
Expand All @@ -15,8 +15,8 @@
#include "EratMedium.hpp"
#include "EratBig.hpp"
#include "macros.hpp"
#include "intrinsics.hpp"
#include "Vector.hpp"
#include "ctz.hpp"

#include <stdint.h>

Expand Down
20 changes: 10 additions & 10 deletions include/primesieve/PrimeGenerator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,9 @@
defined(__AVX512VBMI__) && \
defined(__AVX512VBMI2__) && \
__has_include(<immintrin.h>)
#define ENABLE_AVX512
#define ENABLE_AVX512_VBMI2

#elif defined(ENABLE_MULTIARCH_AVX512) && \
#elif defined(ENABLE_MULTIARCH_AVX512_VBMI2) && \
__has_include(<immintrin.h>)
#include "cpu_supports_avx512_vbmi2.hpp"
#define ENABLE_DEFAULT
Expand All @@ -50,11 +50,11 @@ class PrimeGenerator : public Erat

ALWAYS_INLINE void fillNextPrimes(Vector<uint64_t>& primes, std::size_t* size)
{
#if defined(ENABLE_AVX512)
fillNextPrimes_avx512(primes, size);
#elif defined(ENABLE_MULTIARCH_AVX512)
#if defined(ENABLE_AVX512_VBMI2)
fillNextPrimes_avx512_vbmi2(primes, size);
#elif defined(ENABLE_MULTIARCH_AVX512_VBMI2)
if (cpu_supports_avx512_vbmi2)
fillNextPrimes_avx512(primes, size);
fillNextPrimes_avx512_vbmi2(primes, size);
else
fillNextPrimes_default(primes, size);
#else
Expand All @@ -68,13 +68,13 @@ class PrimeGenerator : public Erat
void fillNextPrimes_default(Vector<uint64_t>& primes, std::size_t* size);
#endif

#if defined(ENABLE_AVX512) || \
defined(ENABLE_MULTIARCH_AVX512)
#if defined(ENABLE_AVX512_VBMI2) || \
defined(ENABLE_MULTIARCH_AVX512_VBMI2)

#if defined(ENABLE_MULTIARCH_AVX512)
#if defined(ENABLE_MULTIARCH_AVX512_VBMI2)
__attribute__ ((target ("avx512f,avx512vbmi,avx512vbmi2")))
#endif
void fillNextPrimes_avx512(Vector<uint64_t>& primes, std::size_t* size);
void fillNextPrimes_avx512_vbmi2(Vector<uint64_t>& primes, std::size_t* size);

#endif

Expand Down
Loading