diff --git a/CHANGELOG b/CHANGELOG index 4798ab4bd..da32b1b87 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -3,6 +3,8 @@ If not stated, FINUFFT is assumed (cuFINUFFT <=1.3 is listed separately). Master, using release name V 2.4.0 (2/11/25) +* Removed FINUFFT_CUDA_ARCHITECTURES flag, as it was unnecessary duplication. +* Enabled LTO for finufft, nvcc support is flaky at the moment. * Added GPU spread interp only test. Added CPU spread interp only test to cmake * Make attributes private in Python Plan classes and allow read-only access to them using properties (Andén #608). diff --git a/CMakeLists.txt b/CMakeLists.txt index 6a9d93d93..8d21cfc3f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -25,8 +25,6 @@ option(FINUFFT_BUILD_DOCS "Whether to build the FINUFFT documentation" OFF) # if FINUFFT_USE_DUCC0 is ON, the following options are ignored set(FINUFFT_FFTW_LIBRARIES "DEFAULT" CACHE STRING "Specify a custom FFTW library") set(FINUFFT_FFTW_SUFFIX "OpenMP" CACHE STRING "Suffix for FFTW libraries (e.g. OpenMP, Threads etc.)") -# if FINUFFT_USE_CUDA is OFF, the following options are ignored -set(FINUFFT_CUDA_ARCHITECTURES "native" CACHE STRING "CUDA architectures to build for (e.g. 60;70;75;)") # if FINUFFT_USE_CPU is OFF, the following options are ignored set(FINUFFT_ARCH_FLAGS "native" CACHE STRING "Compiler flags for specifying target architecture, defaults to -march=native") # sphinx tag (don't remove): @cmake_opts_end @@ -77,7 +75,8 @@ set(FINUFFT_CXX_FLAGS_DEBUG -ggdb -ggdb3 -Wall - -Wno-sign-compare + -Wextra + -Wpedantic -Wno-unknown-pragmas ) @@ -208,7 +207,12 @@ function(finufft_link_test target) endif() enable_asan(${target}) target_compile_features(${target} PRIVATE cxx_std_17) - set_target_properties(${target} PROPERTIES MSVC_RUNTIME_LIBRARY "MultiThreaded$<$:Debug>") + set_target_properties( + ${target} + PROPERTIES + MSVC_RUNTIME_LIBRARY "MultiThreaded$<$:Debug>" + POSITION_INDEPENDENT_CODE ${FINUFFT_POSITION_INDEPENDENT_CODE} + ) # disable deprecated warnings for tests if supported if(FINUFFT_HAS_NO_DEPRECATED_DECLARATIONS) target_compile_options(${target} PRIVATE -Wno-deprecated-declarations) @@ -277,16 +281,14 @@ if(FINUFFT_USE_CPU) endif() if(FINUFFT_USE_CUDA) - if(NOT DEFINED FINUFFT_CUDA_ARCHITECTURES) - if(DEFINED CMAKE_CUDA_ARCHITECTURES) - set(FINUFFT_CUDA_ARCHITECTURES "{$CMAKE_CUDA_ARCHITECTURES}") - else() - message( - "FINUFFT WARNING: No CUDA architecture supplied via '-DFINUFFT_CUDA_ARCHITECTURES=...', defaulting to 'native'" - ) - message("See: https://developer.nvidia.com/cuda-gpus for more details on what architecture to supply.") - endif() + if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES) + message( + WARNING + "FINUFFT WARNING: No CUDA architecture supplied via '-DCMAKE_CUDA_ARCHITECTURES=...', defaulting to 'native'" + ) + message(WARNING "See: https://developer.nvidia.com/cuda-gpus for more details on what architecture to supply.") endif() + set(CMAKE_CUDA_ARCHITECTURES "native") enable_language(CUDA) find_package(CUDAToolkit REQUIRED) add_subdirectory(src/cuda) @@ -348,6 +350,7 @@ message(STATUS " FINUFFT_FFTW_SUFFIX: ${FINUFFT_FFTW_SUFFIX}") message(STATUS " FINUFFT_FFTW_LIBRARIES: ${FINUFFT_FFTW_LIBRARIES}") message(STATUS " FINUFFT_ARCH_FLAGS: ${FINUFFT_ARCH_FLAGS}") message(STATUS " FINUFFT_USE_DUCC0: ${FINUFFT_USE_DUCC0}") +message(STATUS " CMAKE_CUDA_ARCHITECTURES: ${CMAKE_CUDA_ARCHITECTURES}") # gersemi: on if(FINUFFT_ENABLE_INSTALL) diff --git a/cmake/utils.cmake b/cmake/utils.cmake index bcbbb5169..b5571605f 100644 --- a/cmake/utils.cmake +++ b/cmake/utils.cmake @@ -75,3 +75,14 @@ function(copy_dll source_target destination_target) unset(SOURCE_FILE) unset(DESTINATION_FILE) endfunction() + +include(CheckIPOSupported) +check_ipo_supported(RESULT LTO_SUPPORTED OUTPUT LTO_ERROR) + +if(LTO_SUPPORTED) + message(STATUS "LTO is supported and enabled.") + set(FINUFFT_INTERPROCEDURAL_OPTIMIZATION TRUE) +else() + message(WARNING "LTO is not supported: ${LTO_ERROR}") + set(FINUFFT_INTERPROCEDURAL_OPTIMIZATION FALSE) +endif() diff --git a/docs/install_gpu.rst b/docs/install_gpu.rst index ec6a74b60..0c5b4f312 100644 --- a/docs/install_gpu.rst +++ b/docs/install_gpu.rst @@ -45,6 +45,8 @@ To find out your own device's compute capability without having to look it up on This will return a text string such as ``8.6`` which would incidate ``sm_86`` architecture, thus to use ``CMAKE_CUDA_ARCHITECTURES=86``. +Note that by default the ``CMAKE_CUDA_ARCHITECTURES`` flag is set to ``native``, which means that the code will be compiled for the compute capability of the GPU on which the code is being compiled. +This might not be portable so it is recommended to set this flag explicitly when building for multiple systems. A good alternative is ``all-major`` which will compile for all major compute capabilities. Testing ------- diff --git a/perftest/cuda/CMakeLists.txt b/perftest/cuda/CMakeLists.txt index fdf04e723..6f440ed35 100644 --- a/perftest/cuda/CMakeLists.txt +++ b/perftest/cuda/CMakeLists.txt @@ -7,7 +7,7 @@ set_target_properties( cuperftest PROPERTIES LINKER_LANGUAGE CUDA - CUDA_ARCHITECTURES "${FINUFFT_CUDA_ARCHITECTURES}" + CUDA_ARCHITECTURES "${CMAKE_CUDA_ARCHITECTURES}" CUDA_STANDARD 17 CUDA_STANDARD_REQUIRED ON ) diff --git a/src/cuda/CMakeLists.txt b/src/cuda/CMakeLists.txt index eccebb17a..83eba1c96 100644 --- a/src/cuda/CMakeLists.txt +++ b/src/cuda/CMakeLists.txt @@ -34,18 +34,14 @@ set(FINUFFT_CUDA_FLAGS -fmad=true -restrict --extra-device-vectorization - $<$:-G - -maxrregcount - 64 - > - > + -Xnvlink + --strip-all> ) if(FINUFFT_SHARED_LINKING) add_library(cufinufft SHARED ${PRECISION_INDEPENDENT_SRC} ${PRECISION_DEPENDENT_SRC}) else() add_library(cufinufft STATIC ${PRECISION_INDEPENDENT_SRC} ${PRECISION_DEPENDENT_SRC}) - set_target_properties(cufinufft PROPERTIES POSITION_INDEPENDENT_CODE ${FINUFFT_POSITION_INDEPENDENT_CODE}) endif() target_include_directories(cufinufft PUBLIC ${CUFINUFFT_INCLUDE_DIRS}) # set target build location @@ -54,20 +50,28 @@ set_target_properties(cufinufft PROPERTIES LIBRARY_OUTPUT_DIRECTORY "${PROJECT_B set_target_properties( cufinufft PROPERTIES - CUDA_ARCHITECTURES "${FINUFFT_CUDA_ARCHITECTURES}" + CUDA_ARCHITECTURES "${CMAKE_CUDA_ARCHITECTURES}" CUDA_SEPARABLE_COMPILATION ON CUDA_STANDARD 17 CUDA_STANDARD_REQUIRED ON WINDOWS_EXPORT_ALL_SYMBOLS ON ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}" + INTERPROCEDURAL_OPTIMIZATION + OFF # LTO is not supported for CUDA for now + POSITION_INDEPENDENT_CODE ${FINUFFT_POSITION_INDEPENDENT_CODE} ) target_compile_features(cufinufft PRIVATE cxx_std_17) target_compile_options(cufinufft PRIVATE ${FINUFFT_CUDA_FLAGS}) -if(WIN32 OR (BUILD_TESTING AND FINUFFT_BUILD_TESTS)) +if(WIN32 OR (BUILD_TESTING AND FINUFFT_BUILD_TESTS) OR env{CIBUILDWHEEL}) target_link_libraries(cufinufft PUBLIC CUDA::cudart CUDA::cufft) else() target_link_libraries(cufinufft PUBLIC CUDA::cudart_static CUDA::cufft_static) endif() +# disable deprecated warnings for tests if supported +if(FINUFFT_HAS_NO_DEPRECATED_DECLARATIONS) + target_compile_options(cufinufft PRIVATE $<$:-Wno-deprecated-declarations>) +endif() + file(GLOB CUFINUFFT_PUBLIC_HEADERS "${CMAKE_SOURCE_DIR}/include/cufinufft*.h") set_target_properties(cufinufft PROPERTIES PUBLIC_HEADER "${CUFINUFFT_PUBLIC_HEADERS}") diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 71b70ef3c..7f2184fec 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -40,7 +40,6 @@ if(NOT FINUFFT_USE_DUCC0) add_executable(fftw_lock_test fftw_lock_test.cpp) target_compile_features(fftw_lock_test PRIVATE cxx_std_17) finufft_link_test(fftw_lock_test) - add_test(NAME run_fftw_lock_test COMMAND fftw_lock_test WORKING_DIRECTORY ${CMAKE_BINARY_DIR}) endif() diff --git a/test/cuda/CMakeLists.txt b/test/cuda/CMakeLists.txt index 01de47cc4..3e1bada51 100644 --- a/test/cuda/CMakeLists.txt +++ b/test/cuda/CMakeLists.txt @@ -13,12 +13,12 @@ foreach(srcfile ${test_src}) target_compile_features(${executable} PUBLIC cxx_std_17) set_target_properties( ${executable} - PROPERTIES LINKER_LANGUAGE CUDA CUDA_ARCHITECTURES "${FINUFFT_CUDA_ARCHITECTURES}" + PROPERTIES LINKER_LANGUAGE CUDA CUDA_ARCHITECTURES "${CMAKE_CUDA_ARCHITECTURES}" ) message( STATUS "Adding test ${executable}" - " with CUDA_ARCHITECTURES=${FINUFFT_CUDA_ARCHITECTURES}" + " with CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES}" " and INCLUDE=${CUFINUFFT_INCLUDE_DIRS}" ) endforeach()