Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file removed .DS_Store
Binary file not shown.
169 changes: 107 additions & 62 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,6 @@ if(${CMAKE_CXX_COMPILER_ID} STREQUAL "MSVC")
message(FATAL_ERROR "You cannot build ASSET with the Microsoft Compiler. Please use Clang or GCC and try again.")
endif()

if(NOT WIN32)
if(${CMAKE_CXX_COMPILER_ID} STREQUAL "Clang")
set(CMAKE_EXE_LINKER_FLAGS "-fuse-ld=lld")
endif()
endif()


################################################################################
############### Build Settings #################################################
################################################################################
Expand Down Expand Up @@ -57,27 +50,40 @@ list(APPEND RELEASE_FLAGS "-O2")

## Generic Binary Flags
if(BUILD_ASSET_WHEEL)
#list(APPEND RELEASE_FLAGS "-mcx16;-mpopcnt;-msse3;-msse4.1;-msse4.2;-mssse3;-mavx;-mavx2;-mbmi;-mbmi2;-mf16c;-mfma;-mlzcnt;-mmovbe;-mxsave")
# x86-64-v3 flags written out for older compilers
list(APPEND RELEASE_FLAGS "-mcx16")
list(APPEND RELEASE_FLAGS "-mpopcnt")
list(APPEND RELEASE_FLAGS "-msse3")
list(APPEND RELEASE_FLAGS "-msse4.1")
list(APPEND RELEASE_FLAGS "-msse4.2")
list(APPEND RELEASE_FLAGS "-mssse3")
list(APPEND RELEASE_FLAGS "-mavx")
list(APPEND RELEASE_FLAGS "-mavx2")
list(APPEND RELEASE_FLAGS "-mbmi")
list(APPEND RELEASE_FLAGS "-mbmi2")
list(APPEND RELEASE_FLAGS "-mf16c")
list(APPEND RELEASE_FLAGS "-mfma")
list(APPEND RELEASE_FLAGS "-mlzcnt")
list(APPEND RELEASE_FLAGS "-mmovbe")
list(APPEND RELEASE_FLAGS "-mxsave")
# Detect architecture for wheel builds
if(CMAKE_SYSTEM_PROCESSOR MATCHES "arm64|aarch64|ARM64")
# ARM64 (Apple Silicon, ARM servers) - use NEON
list(APPEND RELEASE_FLAGS "-march=armv8-a+simd")
message(STATUS "Building ARM64 wheel with NEON support")
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|AMD64")
# x86-64 (Intel/AMD) - use AVX2 (x86-64-v3 flags written out for older compilers)
list(APPEND RELEASE_FLAGS "-mcx16")
list(APPEND RELEASE_FLAGS "-mpopcnt")
list(APPEND RELEASE_FLAGS "-msse3")
list(APPEND RELEASE_FLAGS "-msse4.1")
list(APPEND RELEASE_FLAGS "-msse4.2")
list(APPEND RELEASE_FLAGS "-mssse3")
list(APPEND RELEASE_FLAGS "-mavx")
list(APPEND RELEASE_FLAGS "-mavx2")
list(APPEND RELEASE_FLAGS "-mbmi")
list(APPEND RELEASE_FLAGS "-mbmi2")
list(APPEND RELEASE_FLAGS "-mf16c")
list(APPEND RELEASE_FLAGS "-mfma")
list(APPEND RELEASE_FLAGS "-mlzcnt")
list(APPEND RELEASE_FLAGS "-mmovbe")
list(APPEND RELEASE_FLAGS "-mxsave")
message(STATUS "Building x86-64 wheel with AVX2 support")
else()
# Fallback for other architectures
list(APPEND RELEASE_FLAGS "-march=native")
message(WARNING "Unknown architecture ${CMAKE_SYSTEM_PROCESSOR}, using -march=native")
endif()

# Windows runs out of ram with LINK_TIME_OPT on gh-actions
if(NOT WIN32)
list(APPEND RELEASE_FLAGS "-mtune=skylake")
if(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|AMD64")
list(APPEND RELEASE_FLAGS "-mtune=skylake")
endif()
endif()
set(LINK_TIME_OPT TRUE) ### DO LTO - Recommended for full release
set(CLANG_MAX_INLINE_DEPTH 400)
Expand Down Expand Up @@ -154,18 +160,14 @@ endif()
if(NOT WIN32)
list(APPEND RELEASE_FLAGS "-fomit-frame-pointer")
list(APPEND RELEASE_FLAGS "-fno-stack-protector")
#list(APPEND RELEASE_FLAGS "-fno-stack-clash-protection")
#list(APPEND RELEASE_FLAGS "-fcf-protection=none")
list(APPEND RELEASE_FLAGS "-fno-asynchronous-unwind-tables")
list(APPEND RELEASE_FLAGS "-ffast-math")
if (NOT APPLE)
#list(APPEND RELEASE_FLAGS "-fno-stack-clash-protection")
endif()
endif()

if(APPLE)
list(APPEND COMMON_FLAGS "-Xlinker -undefined")
list(APPEND COMMON_FLAGS "-Xlinker dynamic_lookup")
endif()



# Combine Flags
set(COMPILE_FLAGS PUBLIC ${COMMON_FLAGS})
Expand Down Expand Up @@ -210,17 +212,20 @@ endif()
###################### Find and set our dependencies ###########################
################################################################################

if(APPLE)
include(FindPythonEnv)
endif()
#if(APPLE)
# include(FindPythonEnv)
#endif()

# Set up submodule dependencies
include_directories(dep/eigen)
include_directories(dep/fmt/include)
include_directories(dep/autodiff)

add_subdirectory(dep)
find_package(Python ${PYVERSION_EXACT} COMPONENTS Interpreter Development REQUIRED)

set(PYBIND11_CPP_STANDARD -std=c++17)
set(PYBIND11_FINDPYTHON ON)
add_subdirectory(dep)

# Set up external dependencies
find_package(Threads REQUIRED)
Expand All @@ -233,38 +238,70 @@ if(UNIX AND NOT APPLE)
list(APPEND COMPILE_FLAGS "-fopenmp")
endif()
elseif(APPLE)
if(FALSE)
include_directories("/usr/local/opt/llvm/include")
link_directories("/usr/local/opt/llvm/lib")
set(CMAKE_LIBRARY_PATH /usr/local/opt/llvm/lib ${CMAKE_LIBRARY_PATH})
set(CMAKE_LIBRARY_PATH $ENV{MKLROOT}/lib ${CMAKE_LIBRARY_PATH})
if(CMAKE_C_COMPILER_ID MATCHES "Clang")
set(OpenMP_C "${CMAKE_C_COMPILER}")
set(OpenMP_C_FLAGS "-fopenmp=libomp")
set(OpenMP_C_LIB_NAMES "libomp")
set(OpenMP_libomp_LIBRARY "omp")
endif()
if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
set(OpenMP_CXX "${CMAKE_CXX_COMPILER}")
set(OpenMP_CXX_FLAGS "-fopenmp=libomp")
set(OpenMP_CXX_LIB_NAMES "libomp")
set(OpenMP_libomp_LIBRARY "omp")
if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
if(CMAKE_CXX_COMPILER_ID MATCHES "AppleClang")
# fina_package(OpenMP REQUIRED) will fail with AppleClang so
# we need this to point towards the homebrew libomp
include_directories("/opt/homebrew/Cellar/libomp/20.1.8/include/")
link_directories("/opt/homebrew/Cellar/libomp/20.1.8/lib/")
list(APPEND COMPILE_FLAGS "-Xclang")
list(APPEND COMPILE_FLAGS "-fopenmp")
list(APPEND OpenMP_CXX_FLAGS "-lomp")
else()
# With homebrew LLVM clang compiler on Mac, this seems to work well for
# finding homebrew OpenMP
find_package(OpenMP REQUIRED)
list(APPEND COMPILE_FLAGS ${OpenMP_CXX_FLAGS})
endif()


list(APPEND COMPILE_FLAGS "-fopenmp")
list(APPEND OpenMP_CXX_FLAGS "-lomp")

# Add flags to make LLVM Clang more compatible with Apple SDK headers
list(APPEND COMPILE_FLAGS "-Wno-elaborated-enum-base")
endif()
endif()

endif()

find_package(MKL REQUIRED)
if(APPLE)
find_package(AccelerateSparse REQUIRED)
add_compile_definitions(USE_ACCELERATE_SPARSE)
else()
find_package(MKL REQUIRED)
endif()

find_package(Python ${PYVERSION_EXACT} REQUIRED COMPONENTS Interpreter Development)

# Set dependency variables
set(INCLUDE_DIRS ${PYBIND11_INCLUDE_DIR} ${PYTHON_INCLUDE_DIRS} ${MKL_INCLUDE_DIRS})
set(LINK_LIBS ${PYTHON_LIBRARIES} ${MKL_LIBRARIES} Threads::Threads ${CMAKE_DL_LIBS})
set(INCLUDE_DIRS ${PYBIND11_INCLUDE_DIR} ${MKL_INCLUDE_DIRS} ${AccelerateSparse_INCLUDE_DIRS})
set(LINK_LIBS ${MKL_LIBRARIES} Threads::Threads ${CMAKE_DL_LIBS} ${AccelerateSparse_LIBRARIES})

# Handle OpenMP for Apple
if (APPLE)
if(CMAKE_CXX_COMPILER_ID MATCHES "AppleClang")
list(APPEND LINK_LIBS ${OpenMP_CXX_FLAGS})
else()
list(APPEND LINK_LIBS OpenMP::OpenMP_CXX)
endif()
endif()

# Handle Python executable variable
if (NOT DEFINED PYTHON_EXECUTABLE)
set(PYTHON_EXECUTABLE ${Python_EXECUTABLE})
endif()
if(DEFINED Python_INCLUDE_DIRS)
list(APPEND INCLUDE_DIRS ${Python_INCLUDE_DIRS})
elseif(DEFINED PYTHON_INCLUDE_DIRS)
list(APPEND INCLUDE_DIRS ${PYTHON_INCLUDE_DIRS})
endif()
if (NOT APPLE)
# Don't directly link with python library on mac and instead rely on -undefined dynamic_lookup
# to bind to the python symbols at runtime from the interpreter
if(DEFINED Python_LIBRARIES)
list(APPEND LINK_LIBS ${Python_LIBRARIES})
elseif(DEFINED PYTHON_LIBRARIES)
list(APPEND LINK_LIBS ${PYTHON_LIBRARIES})
endif()
endif()

if(UNIX)
list(APPEND LINK_LIBS m)
endif()
Expand All @@ -273,13 +310,22 @@ endif()

################################################################################
################ Linker Flags ##################################################

# Set linker for Clang after threading detection is complete
if(NOT WIN32)
if(${CMAKE_CXX_COMPILER_ID} STREQUAL "Clang")
set(CMAKE_EXE_LINKER_FLAGS "-fuse-ld=lld")
endif()
endif()

if(BUILD_SHARED_LIBS AND NOT APPLE AND NOT WIN32) # Windows and Linux Dynamic Linking
list(APPEND LINKER_FLAGS "${OpenMP_CXX_FLAGS} -Wl,--no-undefined -Wl,--start-group ${MKL_LIBRARIES_LIST} -Wl, --end-group")
elseif(UNIX AND NOT APPLE) # Linux Static Linking
list(APPEND LINKER_FLAGS "${OpenMP_CXX_FLAGS} -Wl,--no-undefined -Wl,--start-group ${MKL_LIBRARIES_LIST} -Wl,--end-group")
elseif(APPLE) # Apple Dynamic and Static Linking

list(APPEND LINKER_FLAGS "${OpenMP_CXX_FLAGS} -Xlinker -undefined -Xlinker dynamic_lookup")
list(APPEND LINKER_FLAGS ${OpenMP_CXX_FLAGS})
list(APPEND LINKER_FLAGS "-Wl,-bind_at_load")
list(APPEND LINKER_FLAGS "-Wl,-undefined,dynamic_lookup")
else() # Windows Static Linking
list(APPEND LINKER_FLAGS "${OpenMP_CXX_FLAGS}")
endif()
Expand Down Expand Up @@ -328,4 +374,3 @@ add_subdirectory(pypiwheel)

# Formatting
include(cmake/clang-format.cmake)

143 changes: 143 additions & 0 deletions cmake/FindAccelerateSparse.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
# Ceres Solver - A fast non-linear least squares minimizer
# Copyright 2023 Google Inc. All rights reserved.
# http://ceres-solver.org/
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
# * Neither the name of Google Inc. nor the names of its contributors may be
# used to endorse or promote products derived from this software without
# specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
#
# Author: [email protected] (Alex Stewart)
#
# FindAccelerateSparse.cmake - Find the sparse solvers in Apple's Accelerate
# framework, introduced in Xcode 9.0 (2017).
# Note that this is distinct from the Accelerate
# framework on its own, which existed in previous
# versions but without the sparse solvers.
#
# This module defines the following variables which should be referenced
# by the caller to use the library.
#
# AccelerateSparse_FOUND: TRUE iff an Accelerate framework including the sparse
# solvers, and all dependencies, has been found.
# AccelerateSparse_INCLUDE_DIRS: Include directories for Accelerate framework.
# AccelerateSparse_LIBRARIES: Libraries for Accelerate framework and all
# dependencies.
#
# The following variables are also defined by this module, but in line with
# CMake recommended FindPackage() module style should NOT be referenced directly
# by callers (use the plural variables detailed above instead). These variables
# do however affect the behaviour of the module via FIND_[PATH/LIBRARY]() which
# are NOT re-called (i.e. search for library is not repeated) if these variables
# are set with valid values _in the CMake cache_. This means that if these
# variables are set directly in the cache, either by the user in the CMake GUI,
# or by the user passing -DVAR=VALUE directives to CMake when called (which
# explicitly defines a cache variable), then they will be used verbatim,
# bypassing the HINTS variables and other hard-coded search locations.
#
# AccelerateSparse_INCLUDE_DIR: Include directory for Accelerate framework, not
# including the include directory of any
# dependencies.
# AccelerateSparse_LIBRARY: Accelerate framework, not including the libraries of
# any dependencies.
# Called if we failed to find the Accelerate framework with the sparse solvers.
# Unsets all public (designed to be used externally) variables and reports
# error message at priority depending upon [REQUIRED/QUIET/<NONE>] argument.
macro(accelerate_sparse_report_not_found REASON_MSG)
unset(AccelerateSparse_FOUND)
unset(AccelerateSparse_INCLUDE_DIRS)
unset(AccelerateSparse_LIBRARIES)
# Make results of search visible in the CMake GUI if Accelerate has not
# been found so that user does not have to toggle to advanced view.
mark_as_advanced(CLEAR AccelerateSparse_INCLUDE_DIR
AccelerateSparse_LIBRARY)
# Note <package>_FIND_[REQUIRED/QUIETLY] variables defined by FindPackage()
# use the camelcase library name, not uppercase.
if (AccelerateSparse_FIND_QUIETLY)
message(STATUS "Failed to find Accelerate framework with sparse solvers - "
${REASON_MSG} ${ARGN})
elseif (AccelerateSparse_FIND_REQUIRED)
message(FATAL_ERROR "Failed to find Accelerate framework with sparse solvers - "
${REASON_MSG} ${ARGN})
else()
# Neither QUIETLY nor REQUIRED, use no priority which emits a message
# but continues configuration and allows generation.
message("-- Failed to find Accelerate framework with sparse solvers - "
${REASON_MSG} ${ARGN})
endif()
return()
endmacro()
unset(AccelerateSparse_FOUND)
find_path(AccelerateSparse_INCLUDE_DIR NAMES Accelerate.h)
if (NOT AccelerateSparse_INCLUDE_DIR OR
NOT EXISTS ${AccelerateSparse_INCLUDE_DIR})
accelerate_sparse_report_not_found(
"Could not find Accelerate framework headers. Set "
"AccelerateSparse_INCLUDE_DIR to the directory containing Accelerate.h")
endif()
find_library(AccelerateSparse_LIBRARY NAMES Accelerate)
if (NOT AccelerateSparse_LIBRARY OR
NOT EXISTS ${AccelerateSparse_LIBRARY})
accelerate_sparse_report_not_found(
"Could not find Accelerate framework. Set AccelerateSparse_LIBRARY "
"to the Accelerate.framework directory")
endif()
set(AccelerateSparse_FOUND TRUE)
# Determine if the Accelerate framework detected includes the sparse solvers.
# Skip the test for LLVM Clang due to compatibility issues with Apple SDK headers
if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang" AND NOT "${CMAKE_CXX_COMPILER_VERSION}" MATCHES "Apple")
# For LLVM Clang, assume the sparse solvers are available on macOS
set(ACCELERATE_FRAMEWORK_HAS_SPARSE_SOLVER TRUE)
message(STATUS "Skipping Accelerate framework sparse solver test for LLVM Clang")
else()
# For other compilers, perform the test
include(CheckCXXSourceCompiles)
set(CMAKE_REQUIRED_INCLUDES ${AccelerateSparse_INCLUDE_DIR})
set(CMAKE_REQUIRED_LIBRARIES ${AccelerateSparse_LIBRARY})
check_cxx_source_compiles(
"#include <Accelerate.h>
int main() {
SparseMatrix_Double A;
SparseFactor(SparseFactorizationCholesky, A);
return 0;
}"
ACCELERATE_FRAMEWORK_HAS_SPARSE_SOLVER)
unset(CMAKE_REQUIRED_INCLUDES)
unset(CMAKE_REQUIRED_LIBRARIES)
if (NOT ACCELERATE_FRAMEWORK_HAS_SPARSE_SOLVER)
accelerate_sparse_report_not_found(
"Detected Accelerate framework: ${AccelerateSparse_LIBRARY} does not "
"include the sparse solvers.")
endif()
endif()
if (AccelerateSparse_FOUND)
set(AccelerateSparse_INCLUDE_DIRS ${AccelerateSparse_INCLUDE_DIR})
set(AccelerateSparse_LIBRARIES ${AccelerateSparse_LIBRARY})
endif()
# Handle REQUIRED / QUIET optional arguments and version.
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(AccelerateSparse
REQUIRED_VARS AccelerateSparse_INCLUDE_DIRS AccelerateSparse_LIBRARIES)
if (AccelerateSparse_FOUND)
mark_as_advanced(FORCE AccelerateSparse_INCLUDE_DIR
AccelerateSparse_LIBRARY)
endif()
Loading