|
| 1 | +# See LICENSE_ENZO file for license and copyright information |
| 2 | + |
| 3 | +#.rst: |
| 4 | +# EnableFPOptimizations |
| 5 | +# --------------------- |
| 6 | +# |
| 7 | +# Defines a function that enables value-unsafe floating point optimization |
| 8 | +# flags for the C and C++ compilers. |
| 9 | +# |
| 10 | +# This can also add the openmp-simd flags to the C and C++ compilers (and any |
| 11 | +# other flags necessary for simd optimizations). |
| 12 | + |
| 13 | +if(__enableFPOptimizations) |
| 14 | + return() |
| 15 | +endif() |
| 16 | +set(__enableFPOptimizations YES) |
| 17 | + |
| 18 | +# Function 'enableFPOptimizations' is used to add C and C++ flags to the |
| 19 | +# various build types that compile the program with value unsafe floating point |
| 20 | +# optimizations. |
| 21 | +# |
| 22 | +# ARGUMENTS |
| 23 | +# --------- |
| 24 | +# USE_SIMD |
| 25 | +# When this a passed a TRUE value, this function also adds C and C++ compiler |
| 26 | +# flags to enable OpenMP's SIMD directives |
| 27 | +# - For clang, gnu and intel compilers this does NOT enable other OpenMP |
| 28 | +# directives and should not link the openmp runtime library |
| 29 | +# |
| 30 | +# NOTES |
| 31 | +# ----- |
| 32 | +# This function assumes that the global CONFIG_ARCH_FLAGS variable has been |
| 33 | +# defined by the machine configuration file (this flag should provide the |
| 34 | +# compiler about the architecture of the CPU where the code will be executed, |
| 35 | +# which lets the compiler choose optimal instructions). If this variable has |
| 36 | +# not be defined, the function will produce an error and provide the user with |
| 37 | +# a sensible default that they may want to use. |
| 38 | +# |
| 39 | +# This function may have less impact on the performance of programs compiled by |
| 40 | +# the Intel compiler compared to code compiled by other compilers. This is |
| 41 | +# because the Intel compilers enable some of these options by default (this |
| 42 | +# strongly to the Intel Compiler's reputation for producing faster code) |
| 43 | +function(enableFPOptimizations USE_SIMD) |
| 44 | + |
| 45 | + # ToDo: check assumption that CMAKE_C_COMPILER_ID and CMAKE_CXX_COMPILER_ID |
| 46 | + # are equal to each other... |
| 47 | + |
| 48 | + # First, determine the values for the following variables based on the |
| 49 | + # compiler type: |
| 50 | + # - DFLT_HOSTARCHFLAG: |
| 51 | + # * this stores the flag telling the compiler that it can assume that the |
| 52 | + # CPU architecture of the machine currently performing the compilation |
| 53 | + # is identical to that of the machine where the compiled program is run |
| 54 | + # * this variable is only used to describe a potential default value that |
| 55 | + # the user could use in the error message raised when the |
| 56 | + # CONFIG_ARCH_FLAGS global variable was not defined in the machine |
| 57 | + # configuration file. |
| 58 | + # - SPEED_FLAGS: |
| 59 | + # * Specifies flags that prioritize code speed over code size. |
| 60 | + # * Only the "RELEASE" and "RELWITHDEBINFO" build types will make use of |
| 61 | + # these flags |
| 62 | + # - FP_FLAGS: |
| 63 | + # * Specifies flags enabling value-unsafe floating point optimizations. |
| 64 | + # - OMPSIMD_FLAGS: |
| 65 | + # * Specifies flags enabling OpenMP's SIMD directives |
| 66 | + |
| 67 | + if(CMAKE_CXX_COMPILER_ID MATCHES "Clang") |
| 68 | + # theoretically includes Clang and AppleClang |
| 69 | + |
| 70 | + set(DFLT_HOSTARCHFLAG "-march=native") |
| 71 | + set(SPEED_FLAGS "-O3 -funroll-loops") |
| 72 | + set(FP_FLAGS "-fopenmp-simd") |
| 73 | + set(OMPSIMD_FLAGS "-fopenmp-simd") |
| 74 | + |
| 75 | + message(FATAL_ERROR "enableOFPOptimizations is untested for Clang.") |
| 76 | + |
| 77 | + elseif(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") |
| 78 | + |
| 79 | + # in case we ever want to support a version with OpenMP-SIMD, that does not |
| 80 | + # enable value-unsafe optimizations of floating-point operations, try the |
| 81 | + # following flags: |
| 82 | + # - "-fopenmp-simd -funroll-loops -fno-trapping-math -fno-signed-zeros" |
| 83 | + # - if we also include -fno-math-errno, it will cause 1D MHD shock tube |
| 84 | + # problems to have slightly different L1 error norms for the VL+CT |
| 85 | + # solver, depending on the position in the transverse direction |
| 86 | + # (see MHD_shock_tube_test) |
| 87 | + |
| 88 | + set(DFLT_HOSTARCHFLAG "-march=native") |
| 89 | + set(SPEED_FLAGS "-O3 -funroll-loops") |
| 90 | + set(FP_FLAGS "-ffast-math") |
| 91 | + set(OMPSIMD_FLAGS "-fopenmp-simd") |
| 92 | + |
| 93 | + elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel") |
| 94 | + # this matches both the legacy icc and icpc compiler... |
| 95 | + # For CMake Ver >= 20, CMAKE_CXX_COMPILER_ID is "IntelLLVM" for the latter |
| 96 | + # (but this will continue to match both) |
| 97 | + |
| 98 | + # technically -qopenmp-simd is already enabled by default when compiling |
| 99 | + # with -O2 or higher |
| 100 | + set(DFLT_HOSTARCHFLAG "-xHost") |
| 101 | + set(SPEED_FLAGS "-O3") |
| 102 | + set(FP_FLAGS "") |
| 103 | + set(OMPSIMD_FLAGS "-qopenmp-simd") |
| 104 | + |
| 105 | + else() |
| 106 | + message(FATAL_ERROR |
| 107 | + "OpenMP-SIMD handling is not yet implemented for the " |
| 108 | + "${CMAKE_CXX_COMPILER_ID} compiler." |
| 109 | + ) |
| 110 | + endif() |
| 111 | + |
| 112 | + # Second, check if the CONFIG_ARCH_FLAGS variable is enabled |
| 113 | + if (NOT DEFINED CONFIG_ARCH_FLAGS) |
| 114 | + message(FATAL_ERROR |
| 115 | + "The CONFIG_ARCH_FLAGS variable is not defined.\n" |
| 116 | + "This variable is strongly-recommended while compiling with OpenMP-SIMD, in order to inform the compiler which vector instructions are available/prefered.\n" |
| 117 | + "This variable should be defined in the machine config file.\n" |
| 118 | + "A reasonable default value for this variable, when using the ${CMAKE_CXX_COMPILER_ID} compiler, might be \"${HOSTARCHFLAG}\" (this tells the compiler to target the CPU architecture of the machine that is performing the compilation)." |
| 119 | + ) |
| 120 | + endif() |
| 121 | + |
| 122 | + # Finally, update the flags |
| 123 | + # - we NEED to update CMAKE_<LANG>_FLAGS_<CONFIG> |
| 124 | + # - if we update CMAKE_<LANG>_FLAGS the optimization level flags in |
| 125 | + # CMAKE_<LANG>_FLAGS_<CONFIG> get precedence. This is specifically an |
| 126 | + # issue for RelWithDebInfo builds |
| 127 | + # - we could theoretically use add_compile_options(), but that would also |
| 128 | + # pass these flags to the FORTRAN compiler |
| 129 | + foreach(BTYPE IN ITEMS "DEBUG" "RELEASE" "MINSIZEREL" "RELWITHDEBINFO") |
| 130 | + foreach(LANG IN ITEMS "C" "CXX") |
| 131 | + |
| 132 | + # make a copy of global variable to be modified |
| 133 | + set(localCopy ${CMAKE_${LANG}_FLAGS_${BTYPE}}) |
| 134 | + |
| 135 | + # append the new flags to the local copy |
| 136 | + string(APPEND localCopy " ${FP_FLAGS} ${CONFIG_ARCH_FLAGS}") |
| 137 | + |
| 138 | + if(USE_SIMD) |
| 139 | + string(APPEND localCopy " ${OMPSIMD_FLAGS}") |
| 140 | + endif() |
| 141 | + |
| 142 | + if ((BTYPE STREQUAL "RELEASE") OR |
| 143 | + (BTYPE STREQUAL "RELWITHDEBINFO")) |
| 144 | + # append strings that prioritize speed (at expense of size) |
| 145 | + string(APPEND localCopy " ${SPEED_FLAGS}") |
| 146 | + endif() |
| 147 | + |
| 148 | + # overwrite the global variable |
| 149 | + set(CMAKE_${LANG}_FLAGS_${BTYPE} ${localCopy} PARENT_SCOPE) |
| 150 | + |
| 151 | + endforeach(LANG) |
| 152 | + endforeach(BTYPE) |
| 153 | +endfunction(enableFPOptimizations) |
| 154 | + |
| 155 | +# REQUIRE TARGET_ARCH_FLAGS for gcc compiler... |
0 commit comments