diff --git a/.azure-pipelines.yml b/.azure-pipelines.yml index 85bbaeaf2..2db7f2820 100644 --- a/.azure-pipelines.yml +++ b/.azure-pipelines.yml @@ -84,7 +84,8 @@ jobs: displayName: 'Run checks' - job: compilation_default - displayName: 'Compilation Default GCC 9' + # ubuntu-latest: ubuntu-22.04 w/ GCC 11 + displayName: 'Compilation Default GCC' steps: - template: .azure-pipelines/install-template.yml parameters: @@ -95,14 +96,14 @@ jobs: CUDA: false BUILD: true -- job: compilation_default_gcc7 - displayName: 'Compilation Default GCC 7' +- job: compilation_default_gcc9 + displayName: 'Compilation Default GCC 9' pool: - vmImage: 'ubuntu-18.04' + vmImage: 'ubuntu-20.04' variables: - CC: gcc-7 - CXX: g++-7 - FC: gfortran-7 + CC: gcc-9 + CXX: g++-9 + FC: gfortran-9 steps: - template: .azure-pipelines/install-template.yml parameters: @@ -131,27 +132,33 @@ jobs: CUDA: false BUILD: true -- job: compilation_CUDA10 - displayName: 'Compilation CUDA 10' +- job: compilation_CUDA11_gcc9 + displayName: 'Compilation CUDA 11 GCC 9' pool: - vmImage: 'ubuntu-18.04' + vmImage: 'ubuntu-20.04' variables: - CC: gcc-7 - CXX: g++-7 - FC: gfortran-7 + CC: gcc-9 + CXX: g++-9 + FC: gfortran-9 steps: - template: .azure-pipelines/install-template.yml parameters: CUDA: true - CUDA_V: '10.2' + CUDA_V: '11.4' - template: .azure-pipelines/configure-template.yml parameters: TESTFLAGS: '--with-mpi --with-cuda=cuda10' CUDA: true BUILD: true -- job: compilation_CUDA11 - displayName: 'Compilation CUDA 11' +- job: compilation_CUDA11_gcc10 + displayName: 'Compilation CUDA 11 GCC 10' + pool: + vmImage: 'ubuntu-20.04' + variables: + CC: gcc-10 + CXX: g++-10 + FC: gfortran-10 steps: - template: .azure-pipelines/install-template.yml parameters: @@ -163,6 +170,38 @@ jobs: CUDA: true BUILD: true +- job: compilation_CUDA12 + displayName: 'Compilation CUDA 12 GCC 10' + pool: + vmImage: 'ubuntu-20.04' + variables: + CC: gcc-10 + CXX: g++-10 + FC: gfortran-10 + steps: + - template: .azure-pipelines/install-template.yml + parameters: + CUDA: true + CUDA_V: '12.1' + - template: .azure-pipelines/configure-template.yml + parameters: + TESTFLAGS: '--with-mpi --with-cuda=cuda11' + CUDA: true + BUILD: true + +- job: compilation_CUDA12_latest + displayName: 'Compilation CUDA 12 latest' + steps: + - template: .azure-pipelines/install-template.yml + parameters: + CUDA: true + CUDA_V: '12.1' + - template: .azure-pipelines/configure-template.yml + parameters: + TESTFLAGS: '--with-mpi --with-cuda=cuda12' + CUDA: true + BUILD: true + - job: test_example_1 displayName: 'Test example 1 - simple_topography_and_also_a_simple_fluid_layer' dependsOn: compilation_default diff --git a/.azure-pipelines/install-template.yml b/.azure-pipelines/install-template.yml index 3d5617472..a64f71681 100644 --- a/.azure-pipelines/install-template.yml +++ b/.azure-pipelines/install-template.yml @@ -4,8 +4,10 @@ # software setup on VM nodes # ubuntu-18.04: # https://github.com/actions/virtual-environments/blob/main/images/linux/Ubuntu1804-README.md -# ubuntu-20.04 "ubuntu-latest": +# ubuntu-20.04: # https://github.com/actions/virtual-environments/blob/main/images/linux/Ubuntu2004-README.md +# ubuntu-22.04 "ubuntu-latest": +# https://github.com/actions/runner-images/blob/main/images/linux/Ubuntu2204-Readme.md # parameters: - name: CUDA @@ -20,10 +22,27 @@ steps: # fortran/openMPI compiler echo "CC: ${CC} CXX: ${CXX} FC: ${FC}" # updates repository + echo; echo `uname -a`; lsb_release -a; echo sudo apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 6B05F25D762E3157 sudo apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 78BD65473CB3BD13 sudo apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 762E3157 - if [ "${FC}" == "gfortran-10" ]; then + if [ "${FC}" == "gfortran-9" ]; then + echo "gfortran: gfortran-9 update" + # updating gfortran version + sudo add-apt-repository ppa:ubuntu-toolchain-r/test + sudo apt-get update + sudo apt-get install -y --reinstall gcc-9 g++-9 gfortran-9 + # updates alternatives + echo + update-alternatives --query gfortran + echo + sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-9 100 + sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-9 100 + sudo update-alternatives --install /usr/bin/gfortran gfortran /usr/bin/gfortran-9 100 + echo + update-alternatives --query gfortran + echo + elif [ "${FC}" == "gfortran-10" ]; then echo "gfortran: gfortran-10 update" # updating gfortran version sudo add-apt-repository ppa:ubuntu-toolchain-r/test @@ -62,6 +81,8 @@ steps: echo echo "numpy version : " python -c "import numpy; print(numpy.__version__)" + # checks exit code + if [[ $? -ne 0 ]]; then exit 1; fi echo # version infos @@ -92,9 +113,14 @@ steps: ## distribution xenial: from ubuntu 16.04 #UBUNTU_VERSION=ubuntu1604 ## distribution bionic: from ubuntu 18.04 - UBUNTU_VERSION=ubuntu1804 + #UBUNTU_VERSION=ubuntu1804 ## distribution focal: from ubuntu 20.04 #UBUNTU_VERSION=ubuntu2004 + ## distribution jammy: from ubuntu 22.04 + #UBUNTU_VERSION=ubuntu2204 + + # default + UBUNTU_VERSION=ubuntu2004 # CUDA_VERSION - specifies CUDA toolkit version echo "CUDA version: $CUDA_V" @@ -108,10 +134,20 @@ steps: elif [ "$CUDA_V" == "10.2" ]; then ## bionic CUDA_VERSION=10.2.89-1 + elif [ "$CUDA_V" == "11.4" ]; then + ## focal + CUDA_VERSION=11.4.0-1 + elif [ "$CUDA_V" == "12.1" ]; then + ## focal + CUDA_VERSION=12.1.1-1 else - # note: on azure VM nodes with ubuntu-latest, default gcc version is 9.3; + # note: - on azure VM nodes with ubuntu 18.04, default gcc version is 9.3; + # needs at least CUDA version 10.x + # - on azure VM nodes with ubuntu 20.04, default gcc version is 10.3; # needs at least CUDA version 11.x - CUDA_VERSION=11.4.0-1 + # - on azure VM nodes with ubuntu-latest (22.04), default gcc version is 11.3; + # needs at least CUDA version 11.7 + CUDA_VERSION=12.1.1-1 fi echo @@ -149,7 +185,12 @@ steps: echo # gets repo - if [ "${CUDA_VERSION}" == "11.4.0-1" ]; then + if [ "${CUDA_VERSION}" == "10.2.89-1" ]; then + # gets packages + INSTALLER=cuda-repo-${UBUNTU_VERSION}_${CUDA_VERSION}_${CUDA_ARCH}.deb + wget http://developer.download.nvidia.com/compute/cuda/repos/${UBUNTU_VERSION}/${CUDA_OS}/${INSTALLER} + sudo dpkg -i ${INSTALLER} + elif [ "${CUDA_VERSION}" == "11.4.0-1" ]; then # new CUDA version 11.4 has no cuda-repo-** file, following instructions from website, # see: https://developer.nvidia.com/cuda-downloads?target_os=Linux&target_arch=x86_64&Distribution=Ubuntu&target_version=18.04&target_type=deb_network wget https://developer.download.nvidia.com/compute/cuda/repos/${UBUNTU_VERSION}/${CUDA_OS}/cuda-${UBUNTU_VERSION}.pin @@ -158,10 +199,12 @@ steps: # adds repo sudo add-apt-repository "deb https://developer.download.nvidia.com/compute/cuda/repos/${UBUNTU_VERSION}/${CUDA_OS}/ /" else - # gets packages - INSTALLER=cuda-repo-${UBUNTU_VERSION}_${CUDA_VERSION}_${CUDA_ARCH}.deb - wget http://developer.download.nvidia.com/compute/cuda/repos/${UBUNTU_VERSION}/${CUDA_OS}/${INSTALLER} - sudo dpkg -i ${INSTALLER} + # new versions + wget https://developer.download.nvidia.com/compute/cuda/repos/${UBUNTU_VERSION}/${CUDA_OS}/cuda-${UBUNTU_VERSION}.pin + sudo mv cuda-${UBUNTU_VERSION}.pin /etc/apt/preferences.d/cuda-repository-pin-600 + echo + # adds repo + sudo add-apt-repository "deb https://developer.download.nvidia.com/compute/cuda/repos/${UBUNTU_VERSION}/${CUDA_OS}/ /" fi #echo diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 87b70d9e8..39459f2a9 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Check for changes id: diff @@ -29,9 +29,20 @@ jobs: export DIFF=$( git diff --name-only ${{ github.event.before }} $GITHUB_SHA ) echo " diff between ${{ github.event.before }} and $GITHUB_SHA" fi - echo "$DIFF" + echo "***"; echo "$DIFF"; echo "***" # Escape newlines (replace \n with %0A) - echo "::set-output name=diff::$( echo "$DIFF" | sed ':a;N;$!ba;s/\n/%0A/g' )" + # deprecated: + #echo "::set-output name=diff::$( echo "$DIFF" | sed ':a;N;$!ba;s/\n/%0A/g' )" + # new: + # replace new line with %0A - will result finding only one file with a very long name... + #echo "diff=$( echo "$DIFF" | sed ':a;N;$!ba;s/\n/%0A/g' )" >> $GITHUB_OUTPUT + # doesn't work... + #echo "diff=\"$DIFF\"" >> "$GITHUB_OUTPUT" + # new multi-line format: + # (https://docs.github.com/en/actions/using-workflows/workflow-commands-for-github-actions#multiline-strings) + echo "diff<> $GITHUB_OUTPUT + echo "$DIFF" >> $GITHUB_OUTPUT + echo "EOF" >> $GITHUB_OUTPUT - name: Output changes run: echo "${{ steps.diff.outputs.diff }}" @@ -75,7 +86,7 @@ jobs: needs: changesCheck steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Install dependencies run: | @@ -125,10 +136,10 @@ jobs: strategy: matrix: - os: [ubuntu-latest,ubuntu-18.04] + os: [ubuntu-latest,ubuntu-20.04] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Install packages run: ./.github/scripts/run_install.sh @@ -147,11 +158,11 @@ jobs: needs: changesCheck steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Cache Intel oneapi packages id: cache-intel-oneapi - uses: actions/cache@v2 + uses: actions/cache@v3 with: path: /opt/intel/oneapi key: install-${{ runner.os }}-all @@ -242,7 +253,7 @@ jobs: needs: [linuxCheck] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Install packages run: ./.github/scripts/run_install.sh @@ -260,7 +271,7 @@ jobs: needs: [linuxCheck] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Install packages run: ./.github/scripts/run_install.sh @@ -284,7 +295,7 @@ jobs: needs: [linuxCheck] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Install packages run: ./.github/scripts/run_install.sh @@ -309,7 +320,7 @@ jobs: needs: [linuxCheck] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Install packages run: ./.github/scripts/run_install.sh @@ -334,7 +345,7 @@ jobs: needs: [linuxCheck] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Install packages run: ./.github/scripts/run_install.sh @@ -358,7 +369,7 @@ jobs: needs: [linuxCheck] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Install packages run: ./.github/scripts/run_install.sh @@ -382,7 +393,7 @@ jobs: needs: [linuxCheck] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Install packages run: ./.github/scripts/run_install.sh @@ -407,7 +418,7 @@ jobs: needs: [linuxCheck] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Install packages run: ./.github/scripts/run_install.sh @@ -431,7 +442,7 @@ jobs: needs: [linuxCheck] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Install packages run: ./.github/scripts/run_install.sh @@ -455,7 +466,7 @@ jobs: needs: [linuxCheck] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Install packages run: ./.github/scripts/run_install.sh @@ -479,7 +490,7 @@ jobs: needs: [linuxCheck] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Install packages run: ./.github/scripts/run_install.sh @@ -504,7 +515,7 @@ jobs: needs: [linuxCheck] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Install packages run: ./.github/scripts/run_install.sh diff --git a/EXAMPLES/tomographic_ocean_model/README b/EXAMPLES/tomographic_ocean_model/README index dc43c7995..4d8c0ffec 100644 --- a/EXAMPLES/tomographic_ocean_model/README +++ b/EXAMPLES/tomographic_ocean_model/README @@ -7,6 +7,14 @@ with topography variations. The tomographic file will be created by script createTomographyFile.py provided in utils/ directory. +The default tomography file will provide Vp, Vs and density on a data line like: + #x #z #vp #vs #rho +Optionally, attenuation Q-values can be added to these data lines, like: + #x #z #vp #vs #rho #Qp #Qs +to specify a 2D velocity and attenuation model. +You could for example modify the script createTomographyFile.py accordingly. + + To run this example, type: ./run_this_example.sh diff --git a/Makefile.in b/Makefile.in index 7193f11fe..bfd35256c 100644 --- a/Makefile.in +++ b/Makefile.in @@ -145,6 +145,9 @@ SCOTCH_LIBDIR = @SCOTCH_LIBDIR@ @COND_CUDA11_TRUE@CUDA11 = yes @COND_CUDA11_FALSE@CUDA11 = no +@COND_CUDA12_TRUE@CUDA12 = yes +@COND_CUDA12_FALSE@CUDA12 = no + # CUDA compilation with linking @COND_CUDA_PLUS_TRUE@CUDA_PLUS = yes @COND_CUDA_PLUS_FALSE@CUDA_PLUS = no @@ -173,18 +176,22 @@ CUDA_LINK = @CUDA_LDFLAGS@ @CUDA_LIBS@ -lstdc++ # Volta (cuda9, V100): -gencode=arch=compute_70,code=sm_70 # Turing (cuda10, T4, GeForce RTX 2080): -gencode=arch=compute_75,code=sm_75 # Ampere (cuda11, A100, GeForce RTX 3080): -gencode=arch=compute_80,code=sm_80 - +# Hopper (cuda12, H100): -gencode=arch=compute_90,code=sm_90 GENCODE_20 = -gencode=arch=compute_20,code=\"sm_20,compute_20\" GENCODE_30 = -gencode=arch=compute_30,code=\"sm_30,compute_30\" GENCODE_35 = -gencode=arch=compute_35,code=\"sm_35,compute_35\" GENCODE_37 = -gencode=arch=compute_37,code=\"sm_37\" GENCODE_50 = -gencode=arch=compute_50,code=\"sm_50\" +GENCODE_52 = -gencode=arch=compute_52,code=\"sm_52,compute_52\" GENCODE_60 = -gencode=arch=compute_60,code=\"sm_60,compute_60\" GENCODE_70 = -gencode=arch=compute_70,code=\"sm_70,compute_70\" GENCODE_75 = -gencode=arch=compute_75,code=\"sm_75,compute_75\" GENCODE_80 = -gencode=arch=compute_80,code=\"sm_80,compute_80\" +GENCODE_90 = -gencode=arch=compute_90,code=\"sm_90,compute_90\" # cuda preprocessor flag +# CUDA version 12.0 +@COND_CUDA_TRUE@@COND_CUDA12_TRUE@GENCODE = $(GENCODE_90) $(FC_DEFINE)GPU_DEVICE_Hopper # CUDA version 11.0 @COND_CUDA_TRUE@@COND_CUDA11_TRUE@GENCODE = $(GENCODE_80) $(FC_DEFINE)GPU_DEVICE_Ampere # CUDA version 10.0 @@ -194,7 +201,7 @@ GENCODE_80 = -gencode=arch=compute_80,code=\"sm_80,compute_80\" # CUDA version 8.0 @COND_CUDA_TRUE@@COND_CUDA8_TRUE@GENCODE = $(GENCODE_60) $(FC_DEFINE)GPU_DEVICE_Pascal # CUDA version 7.x -@COND_CUDA_TRUE@@COND_CUDA7_TRUE@GENCODE = $(GENCODE_50) $(FC_DEFINE)GPU_DEVICE_Maxwell +@COND_CUDA_TRUE@@COND_CUDA7_TRUE@GENCODE = $(GENCODE_52) $(FC_DEFINE)GPU_DEVICE_Maxwell # CUDA version 6.5 @COND_CUDA_TRUE@@COND_CUDA6_TRUE@GENCODE = $(GENCODE_37) $(FC_DEFINE)GPU_DEVICE_K80 # CUDA version 5.x @@ -216,6 +223,20 @@ GENCODE_80 = -gencode=arch=compute_80,code=\"sm_80,compute_80\" @COND_CUDA_FALSE@NVCC_FLAGS = $(MPI_INCLUDES) $(COND_MPI_CPPFLAGS) @COND_CUDA_FALSE@NVCCLINK = $(NVCC) $(NVCC_FLAGS) +####################################### +#### +#### OpenMP +#### with configure: ./configure --enable-openmp OMP_FCFLAGS=".." OMP_LIB=.. +#### +####################################### + +@COND_OMP_TRUE@OPENMP = yes +@COND_OMP_FALSE@OPENMP = no + +@COND_OMP_TRUE@FCFLAGS += $(FC_DEFINE)USE_OPENMP @OMP_FCFLAGS@ + +@COND_OMP_TRUE@OMP_LIBS = $(OMP_LIB) +@COND_OMP_FALSE@OMP_LIBS = ####################################### #### diff --git a/configure b/configure index ed705592c..1ff46dd30 100755 --- a/configure +++ b/configure @@ -652,6 +652,8 @@ LIBOBJS GIT_DATE_VERSION GIT_COMMIT_VERSION GIT_PACKAGE_VERSION +OMP_LIB +OMP_FCFLAGS CUDA_LIBS CUDA_LDFLAGS CUDA_CPPFLAGS @@ -709,8 +711,12 @@ ac_ct_FC LDFLAGS FCFLAGS FC +COND_OMP_FALSE +COND_OMP_TRUE COND_CUDA_PLUS_FALSE COND_CUDA_PLUS_TRUE +COND_CUDA12_FALSE +COND_CUDA12_TRUE COND_CUDA11_FALSE COND_CUDA11_TRUE COND_CUDA10_FALSE @@ -789,6 +795,8 @@ enable_double_precision enable_debug with_mpi with_cuda +with_openmp +enable_openmp with_scotch_dir with_scotch_includedir with_scotch_libdir @@ -823,7 +831,9 @@ NVCC CUDA_FLAGS CUDA_INC CUDA_LIB -CPP' +CPP +OMP_FCFLAGS +OMP_LIB' # Initialize some variables set by options. @@ -1449,12 +1459,14 @@ Optional Features: --enable-double-precision solver in double precision [default=no] --enable-debug build with debugging options enabled [default=no] + --enable-openmp build OpenMP enabled version [default=no] Optional Packages: --with-PACKAGE[=ARG] use PACKAGE [ARG=yes] --without-PACKAGE do not use PACKAGE (same as --with-PACKAGE=no) --with-mpi build parallel version [default=no] --with-cuda build CUDA GPU enabled version [default=no] + --with-openmp build OpenMP enabled version [default=no] --with-scotch-dir=DIR define the root path to Scotch (e.g. /opt/scotch/) --with-scotch-includedir=DIR define the path to the Scotch headers (e.g. @@ -1503,6 +1515,8 @@ Some influential environment variables: CUDA_INC Location of CUDA include files CUDA_LIB Location of CUDA library libcudart CPP C preprocessor + OMP_FCFLAGS OpenMP Fortran compiler flags + OMP_LIB Location of extra OpenMP libraries Use these variables to override the choices made by `configure' or to help it to find libraries and programs with nonstandard names/locations. @@ -1710,39 +1724,6 @@ fi } # ac_fn_c_try_link -# ac_fn_c_check_header_compile LINENO HEADER VAR INCLUDES -# ------------------------------------------------------- -# Tests whether HEADER exists and can be compiled using the include files in -# INCLUDES, setting the cache variable VAR accordingly. -ac_fn_c_check_header_compile () -{ - as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 -printf %s "checking for $2... " >&6; } -if eval test \${$3+y} -then : - printf %s "(cached) " >&6 -else $as_nop - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -$4 -#include <$2> -_ACEOF -if ac_fn_c_try_compile "$LINENO" -then : - eval "$3=yes" -else $as_nop - eval "$3=no" -fi -rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext -fi -eval ac_res=\$$3 - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 -printf "%s\n" "$ac_res" >&6; } - eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno - -} # ac_fn_c_check_header_compile - # ac_fn_fc_try_link LINENO # ------------------------ # Try to link conftest.$ac_ext, and return whether this succeeded. @@ -1790,6 +1771,39 @@ fi } # ac_fn_fc_try_link +# ac_fn_c_check_header_compile LINENO HEADER VAR INCLUDES +# ------------------------------------------------------- +# Tests whether HEADER exists and can be compiled using the include files in +# INCLUDES, setting the cache variable VAR accordingly. +ac_fn_c_check_header_compile () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 +printf %s "checking for $2... " >&6; } +if eval test \${$3+y} +then : + printf %s "(cached) " >&6 +else $as_nop + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$4 +#include <$2> +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + eval "$3=yes" +else $as_nop + eval "$3=no" +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext +fi +eval ac_res=\$$3 + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +printf "%s\n" "$ac_res" >&6; } + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + +} # ac_fn_c_check_header_compile + # ac_fn_cxx_try_compile LINENO # ---------------------------- # Try to compile conftest.$ac_ext, and return whether this succeeded. @@ -3314,6 +3328,14 @@ else COND_CUDA11_FALSE= fi + if test x"$want_cuda" = xcuda12; then + COND_CUDA12_TRUE= + COND_CUDA12_FALSE='#' +else + COND_CUDA12_TRUE='#' + COND_CUDA12_FALSE= +fi + # cuda linking for cuda 5x and 6x and 7x and 8x and .. if test "$want_cuda" = cuda4 \ @@ -3324,6 +3346,7 @@ fi -o "$want_cuda" = cuda9 \ -o "$want_cuda" = cuda10 \ -o "$want_cuda" = cuda11 \ + -o "$want_cuda" = cuda12 \ ; then COND_CUDA_PLUS_TRUE= COND_CUDA_PLUS_FALSE='#' @@ -3333,6 +3356,42 @@ else fi +### +### OpenMP +### + + +# Check whether --with-openmp was given. +if test ${with_openmp+y} +then : + withval=$with_openmp; with_omp="$withval" +else $as_nop + with_omp=no +fi + +# Check whether --enable-openmp was given. +if test ${enable_openmp+y} +then : + enableval=$enable_openmp; enable_omp="$enableval" +else $as_nop + enable_omp=no +fi + +if test x"$enable_omp" != xno || test x"$with_omp" != xno +then : + want_omp=yes +else $as_nop + want_omp=no +fi + if test x"$want_omp" != xno; then + COND_OMP_TRUE= + COND_OMP_FALSE='#' +else + COND_OMP_TRUE='#' + COND_OMP_FALSE= +fi + + ############################################################ # Checks for programs. @@ -6390,73 +6449,6 @@ ac_link='$FC -o conftest$ac_exeext $FCFLAGS $LDFLAGS $ac_fcflags_srcext conftest ac_compiler_gnu=$ac_cv_fc_compiler_gnu -ac_ext=c -ac_cpp='$CPP $CPPFLAGS' -ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' -ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' -ac_compiler_gnu=$ac_cv_c_compiler_gnu - -ac_header= ac_cache= -for ac_item in $ac_header_c_list -do - if test $ac_cache; then - ac_fn_c_check_header_compile "$LINENO" $ac_header ac_cv_header_$ac_cache "$ac_includes_default" - if eval test \"x\$ac_cv_header_$ac_cache\" = xyes; then - printf "%s\n" "#define $ac_item 1" >> confdefs.h - fi - ac_header= ac_cache= - elif test $ac_header; then - ac_cache=$ac_item - else - ac_header=$ac_item - fi -done - - - - - - - - -if test $ac_cv_header_stdlib_h = yes && test $ac_cv_header_string_h = yes -then : - -printf "%s\n" "#define STDC_HEADERS 1" >>confdefs.h - -fi -ac_fn_c_check_header_compile "$LINENO" "emmintrin.h" "ac_cv_header_emmintrin_h" "$ac_includes_default" -if test "x$ac_cv_header_emmintrin_h" = xyes -then : - -printf "%s\n" "#define HAVE_EMMINTRIN 1" >>confdefs.h - -fi - -ac_fn_c_check_header_compile "$LINENO" "xmmintrin.h" "ac_cv_header_xmmintrin_h" "$ac_includes_default" -if test "x$ac_cv_header_xmmintrin_h" = xyes -then : - -printf "%s\n" "#define HAVE_XMMINTRIN 1" >>confdefs.h - -fi - -# AIX doesn't have err.h so we need to check -ac_fn_c_check_header_compile "$LINENO" "err.h" "ac_cv_header_err_h" "$ac_includes_default" -if test "x$ac_cv_header_err_h" = xyes -then : - -printf "%s\n" "#define HAVE_ERR 1" >>confdefs.h - -fi - -ac_ext=${ac_fc_srcext-f} -ac_compile='$FC -c $FCFLAGS $ac_fcflags_srcext conftest.$ac_ext >&5' -ac_link='$FC -o conftest$ac_exeext $FCFLAGS $LDFLAGS $ac_fcflags_srcext conftest.$ac_ext $LIBS >&5' -ac_compiler_gnu=$ac_cv_fc_compiler_gnu - - - ############################################################ #checks for Scotch @@ -6586,7 +6578,9 @@ printf "%s\n" "#define HAVE_SCOTCH 1" >>confdefs.h { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no, using bundled scotch instead" >&5 printf "%s\n" "no, using bundled scotch instead" >&6; } - for ac_prog in flex lex + + + for ac_prog in flex lex do # Extract the first word of "$ac_prog", so it can be a program name with args. set dummy $ac_prog; ac_word=$2 @@ -6761,58 +6755,7 @@ then : else $as_nop LEXLIB=$ac_cv_lib_lex fi - ac_save_LIBS="$LIBS" - LIBS= - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for library containing yywrap" >&5 -printf %s "checking for library containing yywrap... " >&6; } -if test ${ac_cv_search_yywrap+y} -then : - printf %s "(cached) " >&6 -else $as_nop - ac_func_search_save_LIBS=$LIBS -cat > conftest.$ac_ext <<_ACEOF - program main - call yywrap - end -_ACEOF -for ac_lib in '' fl l -do - if test -z "$ac_lib"; then - ac_res="none required" - else - ac_res=-l$ac_lib - LIBS="-l$ac_lib $ac_func_search_save_LIBS" - fi - if ac_fn_fc_try_link "$LINENO" -then : - ac_cv_search_yywrap=$ac_res -fi -rm -f core conftest.err conftest.$ac_objext conftest.beam \ - conftest$ac_exeext - if test ${ac_cv_search_yywrap+y} -then : - break -fi -done -if test ${ac_cv_search_yywrap+y} -then : -else $as_nop - ac_cv_search_yywrap=no -fi -rm conftest.$ac_ext -LIBS=$ac_func_search_save_LIBS -fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_yywrap" >&5 -printf "%s\n" "$ac_cv_search_yywrap" >&6; } -ac_res=$ac_cv_search_yywrap -if test "$ac_res" != no -then : - test "$ac_res" = "none required" || LIBS="$ac_res $LIBS" - LEXLIB="$LIBS" -fi - - LIBS="$ac_save_LIBS" fi @@ -6853,9 +6796,11 @@ fi rm -f conftest.l $LEX_OUTPUT_ROOT.c fi + if test -z "$LEX" || test "X$LEX" = "Xno"; then as_fn_error $? "No suitable lex found" "$LINENO" 5 fi + for ac_prog in 'bison -y' byacc do # Extract the first word of "$ac_prog", so it can be a program name with args. @@ -6910,6 +6855,7 @@ test -n "$YACC" || YACC="yacc" + ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' @@ -7579,6 +7525,73 @@ ac_empty="" +# Checks for header files. + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +ac_header= ac_cache= +for ac_item in $ac_header_c_list +do + if test $ac_cache; then + ac_fn_c_check_header_compile "$LINENO" $ac_header ac_cv_header_$ac_cache "$ac_includes_default" + if eval test \"x\$ac_cv_header_$ac_cache\" = xyes; then + printf "%s\n" "#define $ac_item 1" >> confdefs.h + fi + ac_header= ac_cache= + elif test $ac_header; then + ac_cache=$ac_item + else + ac_header=$ac_item + fi +done + + + + + + + + +if test $ac_cv_header_stdlib_h = yes && test $ac_cv_header_string_h = yes +then : + +printf "%s\n" "#define STDC_HEADERS 1" >>confdefs.h + +fi +ac_fn_c_check_header_compile "$LINENO" "emmintrin.h" "ac_cv_header_emmintrin_h" "$ac_includes_default" +if test "x$ac_cv_header_emmintrin_h" = xyes +then : + +printf "%s\n" "#define HAVE_EMMINTRIN 1" >>confdefs.h + +fi + +ac_fn_c_check_header_compile "$LINENO" "xmmintrin.h" "ac_cv_header_xmmintrin_h" "$ac_includes_default" +if test "x$ac_cv_header_xmmintrin_h" = xyes +then : + +printf "%s\n" "#define HAVE_XMMINTRIN 1" >>confdefs.h + +fi + +# AIX doesn't have err.h so we need to check +ac_fn_c_check_header_compile "$LINENO" "err.h" "ac_cv_header_err_h" "$ac_includes_default" +if test "x$ac_cv_header_err_h" = xyes +then : + +printf "%s\n" "#define HAVE_ERR 1" >>confdefs.h + +fi + +ac_ext=${ac_fc_srcext-f} +ac_compile='$FC -c $FCFLAGS $ac_fcflags_srcext conftest.$ac_ext >&5' +ac_link='$FC -o conftest$ac_exeext $FCFLAGS $LDFLAGS $ac_fcflags_srcext conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_fc_compiler_gnu + # Checks for libraries. @@ -8593,6 +8606,96 @@ ac_compiler_gnu=$ac_cv_fc_compiler_gnu +fi + +### +### OpenMP +### + +if test x"$want_omp" != xno +then : + + printf "%s\n" "## ------ ## +## OpenMP ## +## ------ ##" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: OpenMP compilation is enabled" >&5 +printf "%s\n" "$as_me: OpenMP compilation is enabled" >&6;} + + + + + # openmp checking + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: OpenMP flag $OMP_FCFLAGS" >&5 +printf "%s\n" "$as_me: OpenMP flag $OMP_FCFLAGS" >&6;} + +ac_ext=${ac_fc_srcext-f} +ac_compile='$FC -c $FCFLAGS $ac_fcflags_srcext conftest.$ac_ext >&5' +ac_link='$FC -o conftest$ac_exeext $FCFLAGS $LDFLAGS $ac_fcflags_srcext conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_fc_compiler_gnu + +cit_fc_save_fc=$FC +cit_fc_save_fcflags=$FCFLAGS +FC=$FC +FCFLAGS="$FCFLAGS $OMP_FCFLAGS" + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether OpenMP directives work" >&5 +printf %s "checking whether OpenMP directives work... " >&6; } + +#AC_COMPILE_IFELSE(_CIT_FC_TRIVIAL_OPENMP_PROGRAM, [ +# AC_MSG_RESULT(yes) +#], [ +# AC_MSG_RESULT(no) +# AC_MSG_FAILURE([cannot compile a trivial OpenMP program using $FC]) +#]) + +cat > conftest.$ac_ext <<_ACEOF + + program main + + implicit none + integer OMP_get_thread_num + integer OMP_GET_MAX_THREADS + integer NUM_THREADS + integer thread_id + + NUM_THREADS = OMP_GET_MAX_THREADS() + !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(thread_id) + thread_id = OMP_get_thread_num()+1 + !$OMP END PARALLEL + + end + +_ACEOF +if ac_fn_fc_try_link "$LINENO" +then : + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "cannot link a trivial OpenMP program using $FC with flags: $OMP_FCFLAGS +See \`config.log' for more details" "$LINENO" 5; } + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + +FC=$cit_fc_save_fc +FCFLAGS=$cit_fc_save_fcflags + + +ac_ext=${ac_fc_srcext-f} +ac_compile='$FC -c $FCFLAGS $ac_fcflags_srcext conftest.$ac_ext >&5' +ac_link='$FC -o conftest$ac_exeext $FCFLAGS $LDFLAGS $ac_fcflags_srcext conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_fc_compiler_gnu + + + fi ## @@ -8864,10 +8967,18 @@ if test -z "${COND_CUDA11_TRUE}" && test -z "${COND_CUDA11_FALSE}"; then as_fn_error $? "conditional \"COND_CUDA11\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi +if test -z "${COND_CUDA12_TRUE}" && test -z "${COND_CUDA12_FALSE}"; then + as_fn_error $? "conditional \"COND_CUDA12\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi if test -z "${COND_CUDA_PLUS_TRUE}" && test -z "${COND_CUDA_PLUS_FALSE}"; then as_fn_error $? "conditional \"COND_CUDA_PLUS\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi +if test -z "${COND_OMP_TRUE}" && test -z "${COND_OMP_FALSE}"; then + as_fn_error $? "conditional \"COND_OMP\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi if test -z "${COND_SCOTCH_TRUE}" && test -z "${COND_SCOTCH_FALSE}"; then as_fn_error $? "conditional \"COND_SCOTCH\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 diff --git a/configure.ac b/configure.ac index 5f422a695..b3c618520 100644 --- a/configure.ac +++ b/configure.ac @@ -90,6 +90,7 @@ AM_CONDITIONAL([COND_CUDA8], [test x"$want_cuda" = xcuda8]) AM_CONDITIONAL([COND_CUDA9], [test x"$want_cuda" = xcuda9]) AM_CONDITIONAL([COND_CUDA10], [test x"$want_cuda" = xcuda10]) AM_CONDITIONAL([COND_CUDA11], [test x"$want_cuda" = xcuda11]) +AM_CONDITIONAL([COND_CUDA12], [test x"$want_cuda" = xcuda12]) # cuda linking for cuda 5x and 6x and 7x and 8x and .. AM_CONDITIONAL([COND_CUDA_PLUS], @@ -100,9 +101,28 @@ AM_CONDITIONAL([COND_CUDA_PLUS], -o "$want_cuda" = cuda8 \ -o "$want_cuda" = cuda9 \ -o "$want_cuda" = cuda10 \ - -o "$want_cuda" = cuda11 \] + -o "$want_cuda" = cuda11 \ + -o "$want_cuda" = cuda12 \] ) +### +### OpenMP +### + +AC_ARG_WITH([openmp], + [AS_HELP_STRING([--with-openmp], + [build OpenMP enabled version @<:@default=no@:>@])], + [with_omp="$withval"], + [with_omp=no]) +AC_ARG_ENABLE([openmp], + [AS_HELP_STRING([--enable-openmp], + [build OpenMP enabled version @<:@default=no@:>@])], + [enable_omp="$enableval"], + [enable_omp=no]) +AS_IF([test x"$enable_omp" != xno || test x"$with_omp" != xno], + [want_omp=yes],[want_omp=no]) +AM_CONDITIONAL([COND_OMP], [test x"$want_omp" != xno]) + ############################################################ # Checks for programs. @@ -142,18 +162,10 @@ AC_FC_FREEFORM() AC_FC_PP_DEFINE() AC_SUBST([FC_DEFINE]) -AC_FC_PP_SRCEXT(F90)dnl Because AC_FC_PP_DEFINE messes with things. +AC_FC_PP_SRCEXT(F90) dnl Because AC_FC_PP_DEFINE messes with things. AC_PROG_CC -AC_LANG_PUSH(C) -AC_CHECK_HEADER(emmintrin.h,AC_DEFINE([HAVE_EMMINTRIN],[1],[Define if emmintrin.h])) -AC_CHECK_HEADER(xmmintrin.h,AC_DEFINE([HAVE_XMMINTRIN],[1],[Define if xmmintrin.h])) -# AIX doesn't have err.h so we need to check -AC_CHECK_HEADER(err.h, AC_DEFINE([HAVE_ERR], [1],[Define if err.h])) -AC_LANG_POP(C) - - ############################################################ #checks for Scotch @@ -235,14 +247,25 @@ if test "x${scotch_usable}" = "xyes"; then else AC_DEFINE([HAVE_SCOTCH],[1],[defined if Scotch is installed]) AC_MSG_RESULT([no, using bundled scotch instead]) - AC_PROG_LEX + + dnl autoconf 2.69 AC_PROG_LEX has no parameters + dnl autoconf 2.70 AC_PROG_LEX prints a deprecation warning without params + m4_if(m4_version_compare(m4_defn([AC_AUTOCONF_VERSION]), [2.70]), [-1], [dnl + dnl autoconf < 2.70 + AC_PROG_LEX + ], [ + dnl autoconf >= 2.70 + AC_PROG_LEX([noyywrap]) + ]) if test -z "$LEX" || test "X$LEX" = "Xno"; then AC_MSG_ERROR([No suitable lex found]) fi + AC_PROG_YACC if test -z "$YACC" || test "X$YACC" = "Xno"; then AC_MSG_ERROR([No suitable yacc or bison found]) fi + ACX_PTHREAD(AC_MSG_RESULT([pthread found]), AC_MSG_ERROR([pthread not found])) # scotch only needed with mpi support @@ -319,6 +342,14 @@ AC_FC_MODULE_OUTPUT_FLAG([ ]) AC_SUBST([FC_MODDIR]) +# Checks for header files. + +AC_LANG_PUSH(C) +AC_CHECK_HEADER(emmintrin.h,AC_DEFINE([HAVE_EMMINTRIN],[1],[Define if emmintrin.h])) +AC_CHECK_HEADER(xmmintrin.h,AC_DEFINE([HAVE_XMMINTRIN],[1],[Define if xmmintrin.h])) +# AIX doesn't have err.h so we need to check +AC_CHECK_HEADER(err.h, AC_DEFINE([HAVE_ERR], [1],[Define if err.h])) +AC_LANG_POP(C) # Checks for libraries. @@ -341,6 +372,22 @@ AS_IF([test x"$want_cuda" != xno], [ CIT_CUDA_CONFIG ]) +### +### OpenMP +### + +AS_IF([test x"$want_omp" != xno], [ + AS_BOX([OpenMP]) + AC_MSG_NOTICE([OpenMP compilation is enabled]) + + AC_ARG_VAR(OMP_FCFLAGS, [OpenMP Fortran compiler flags]) + AC_ARG_VAR(OMP_LIB, [Location of extra OpenMP libraries]) + + # openmp checking + AC_MSG_NOTICE([OpenMP flag $OMP_FCFLAGS]) + CIT_FC_OPENMP_MODULE([$FC], [$OMP_FCFLAGS]) +]) + ## ## Git version info ## @@ -371,12 +418,12 @@ AS_BOX([setting up default simulation setup]) # Output results. AC_CONFIG_FILES([ - Makefile - setup/constants.h - setup/constants_tomography.h - setup/precision.h - setup/config.fh - setup/version.fh + Makefile + setup/constants.h + setup/constants_tomography.h + setup/precision.h + setup/config.fh + setup/version.fh ]) # Scotch makefile diff --git a/flags.guess b/flags.guess index eb18414d1..500698bc8 100644 --- a/flags.guess +++ b/flags.guess @@ -110,6 +110,7 @@ case $my_FC in OPT_FFLAGS="-O3 -finline-functions" DEBUG_FFLAGS="-g -O0 -ggdb -fbacktrace -fbounds-check -frange-check -Werror" # useful to track loss of accuracy because of automatic double to single precision conversion: -Wconversion (this may generate many warnings...) + OMP_FFLAGS="-fopenmp" ;; f90|*/f90) case $host_os in diff --git a/setup/constants.h.in b/setup/constants.h.in index 0310ab7f2..9bd3cdfdc 100644 --- a/setup/constants.h.in +++ b/setup/constants.h.in @@ -121,6 +121,9 @@ ! for viscoacousticity logical, parameter :: USE_A_STRONG_FORMULATION_FOR_E1 = .false. +! maximum attenuation Q-value + double precision, parameter :: ATTENUATION_COMP_MAXIMUM = 9999.d0 + !!----------------------------------------------------------- !! !! noise simulations diff --git a/src/gpu/kernels/Kernel_2_viscoelastic_impl.cu b/src/gpu/kernels/Kernel_2_viscoelastic_impl.cu index b179d2759..4daa9771d 100644 --- a/src/gpu/kernels/Kernel_2_viscoelastic_impl.cu +++ b/src/gpu/kernels/Kernel_2_viscoelastic_impl.cu @@ -35,7 +35,6 @@ #ifdef USE_TEXTURES_FIELDS - realw_texture d_displ_tex; realw_texture d_accel_tex; // backward/reconstructed @@ -59,7 +58,6 @@ template<> __device__ float texfetch_accel<1>(int x) { return tex1Dfetch(d_accel // FORWARD_OR_ADJOINT == 3 <- backward/reconstructed arrays template<> __device__ float texfetch_displ<3>(int x) { return tex1Dfetch(d_b_displ_tex, x); } template<> __device__ float texfetch_accel<3>(int x) { return tex1Dfetch(d_b_accel_tex, x); } - #endif #ifdef USE_TEXTURES_CONSTANTS diff --git a/src/gpu/mesh_constants_cuda.h b/src/gpu/mesh_constants_cuda.h index d7519c600..0ea00a46c 100644 --- a/src/gpu/mesh_constants_cuda.h +++ b/src/gpu/mesh_constants_cuda.h @@ -153,11 +153,11 @@ //#define USE_TEXTURES_CONSTANTS // might not working properly yet, please test on your card... #ifdef USE_CUDA -// CUDA version >= 4.0 needed for cudaTextureType1D and cudaDeviceSynchronize() -#if CUDA_VERSION < 4000 -#undef USE_TEXTURES_FIELDS -#undef USE_TEXTURES_CONSTANTS -#endif + // CUDA version >= 4.0 needed for cudaTextureType1D and cudaDeviceSynchronize() + #if CUDA_VERSION < 4000 || (defined (__CUDACC_VER_MAJOR__) && (__CUDACC_VER_MAJOR__ < 4)) + #undef USE_TEXTURES_FIELDS + #undef USE_TEXTURES_CONSTANTS + #endif #endif // CUDA compiler specifications @@ -236,11 +236,19 @@ #undef USE_LAUNCH_BOUNDS #endif +#ifdef GPU_DEVICE_Hopper +// specifics see: https://docs.nvidia.com/cuda/hopper-tuning-guide/index.html +// register file size 64k 32-bit registers per SM +// shared memory size 228KB per SM (maximum shared memory, 227KB per thread block) +// maximum registers 255 per thread +#undef USE_LAUNCH_BOUNDS +#endif + /* ----------------------------------------------------------------------------------------------- */ // cuda kernel block size for updating displacements/potential (newmark time scheme) // current hardware: 128 is slightly faster than 256 ( ~ 4%) -#define BLOCKSIZE_KERNEL1 32 +#define BLOCKSIZE_KERNEL1 128 #define BLOCKSIZE_KERNEL3 128 #define BLOCKSIZE_TRANSFER 256 @@ -287,7 +295,12 @@ typedef float realw; // textures +// note: texture templates are supported only for CUDA versions <= 11.x +// since CUDA 12.x, these are deprecated and texture objects should be used instead +// see: https://developer.nvidia.com/blog/cuda-pro-tip-kepler-texture-objects-improve-performance-and-flexibility/ +#if defined(USE_TEXTURES_FIELDS) || defined(USE_TEXTURES_CONSTANTS) typedef texture realw_texture; +#endif // pointer declarations // restricted pointers: can improve performance on Kepler ~ 10% diff --git a/src/gpu/rules.mk b/src/gpu/rules.mk index a0bb3c020..20917c27e 100644 --- a/src/gpu/rules.mk +++ b/src/gpu/rules.mk @@ -122,6 +122,9 @@ ifeq ($(CUDA),yes) ifeq ($(CUDA11),yes) BUILD_VERSION_TXT += (v11) endif + ifeq ($(CUDA12),yes) + BUILD_VERSION_TXT += (v12) + endif endif BUILD_VERSION_TXT += support diff --git a/src/gpu/update_displacement_cuda.cu b/src/gpu/update_displacement_cuda.cu index 887025bbf..ba0602017 100644 --- a/src/gpu/update_displacement_cuda.cu +++ b/src/gpu/update_displacement_cuda.cu @@ -231,7 +231,7 @@ void FC_FUNC_(kernel_3_a_cuda, int size = mp->NGLOB_AB; - int blocksize = BLOCKSIZE_KERNEL1; + int blocksize = BLOCKSIZE_KERNEL3; int size_padded = ((int)ceil(((double)size)/((double)blocksize)))*blocksize; int num_blocks_x, num_blocks_y; @@ -332,7 +332,7 @@ void FC_FUNC_(kernel_3_acoustic_cuda, int size = mp->NGLOB_AB; - int blocksize = BLOCKSIZE_KERNEL1; + int blocksize = BLOCKSIZE_KERNEL3; int size_padded = ((int)ceil(((double)size)/((double)blocksize)))*blocksize; int num_blocks_x, num_blocks_y; diff --git a/src/shared/init_openmp.F90 b/src/shared/init_openmp.F90 new file mode 100644 index 000000000..b53a57fde --- /dev/null +++ b/src/shared/init_openmp.F90 @@ -0,0 +1,101 @@ +!======================================================================== +! +! S P E C F E M 2 D +! ----------------- +! +! Main historical authors: Dimitri Komatitsch and Jeroen Tromp +! CNRS, France +! and Princeton University, USA +! (there are currently many more authors!) +! (c) October 2017 +! +! This software is a computer program whose purpose is to solve +! the two-dimensional viscoelastic anisotropic or poroelastic wave equation +! using a spectral-element method (SEM). +! +! This program is free software; you can redistribute it and/or modify +! it under the terms of the GNU General Public License as published by +! the Free Software Foundation; either version 3 of the License, or +! (at your option) any later version. +! +! This program is distributed in the hope that it will be useful, +! but WITHOUT ANY WARRANTY; without even the implied warranty of +! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +! GNU General Public License for more details. +! +! You should have received a copy of the GNU General Public License along +! with this program; if not, write to the Free Software Foundation, Inc., +! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +! +! The full text of the license is available in file "LICENSE". +! +!======================================================================== + + + subroutine init_openmp() + +! outputs OpenMP support info + +#ifdef USE_OPENMP + use constants, only: myrank,IMAIN +#endif + + implicit none + +#ifdef USE_OPENMP + ! local parameters + integer :: thread_id,num_threads + integer :: num_procs,max_threads + logical :: is_dynamic !,is_nested + integer :: max_active_levels + ! OpenMP functions + integer,external :: OMP_GET_NUM_THREADS,OMP_GET_THREAD_NUM + integer,external :: OMP_GET_NUM_PROCS,OMP_GET_MAX_THREADS + logical,external :: OMP_GET_DYNAMIC + + ! note: OMP_GET_NESTED is deprecated (OpenMP version 5.1) + ! instead OMP_GET_MAX_ACTIVE_LEVELS should be used. + integer,external :: OMP_GET_MAX_ACTIVE_LEVELS + +!$OMP PARALLEL DEFAULT(NONE) & +!$OMP SHARED(myrank) & +!$OMP PRIVATE(thread_id,num_threads,num_procs,max_threads,is_dynamic,max_active_levels) + ! gets thread number + thread_id = OMP_GET_THREAD_NUM() + + ! gets total number of threads for this MPI process + num_threads = OMP_GET_NUM_THREADS() + + ! OpenMP main thread only + if (thread_id == 0) then + ! gets additional environment info + num_procs = OMP_GET_NUM_PROCS() + max_threads = OMP_GET_MAX_THREADS() + is_dynamic = OMP_GET_DYNAMIC() + ! is_nested = OMP_GET_NESTED() -- deprecated + max_active_levels = OMP_GET_MAX_ACTIVE_LEVELS() + + ! user output + if (myrank == 0) then + write(IMAIN,*) + write(IMAIN,*) 'OpenMP information:' + write(IMAIN,*) ' number of threads (per MPI process) = ', num_threads + write(IMAIN,*) + write(IMAIN,*) ' number of processors available = ', num_procs + write(IMAIN,*) ' maximum number of threads available = ', num_procs + write(IMAIN,*) ' dynamic thread adjustement = ', is_dynamic + !write(IMAIN,*) ' nested parallelism = ', is_nested + write(IMAIN,*) ' maximum nested active levels = ', max_active_levels + write(IMAIN,*) + call flush_IMAIN() + endif + endif +!$OMP END PARALLEL + +#else + ! nothing to do.. + return +#endif + + end subroutine init_openmp + diff --git a/src/shared/read_material_table.f90 b/src/shared/read_material_table.f90 index da0fb19c5..a162515e5 100644 --- a/src/shared/read_material_table.f90 +++ b/src/shared/read_material_table.f90 @@ -35,7 +35,8 @@ subroutine read_material_table() ! reads in material definitions in DATA/Par_file - use constants, only: IMAIN,TINYVAL,ISOTROPIC_MATERIAL,ANISOTROPIC_MATERIAL,POROELASTIC_MATERIAL + use constants, only: IMAIN,TINYVAL,ISOTROPIC_MATERIAL,ANISOTROPIC_MATERIAL,POROELASTIC_MATERIAL, & + ATTENUATION_COMP_MAXIMUM use shared_parameters, only: AXISYM,nbmodels,icodemat,cp,cs, & aniso3,aniso4,aniso5,aniso6,aniso7,aniso8,aniso9,aniso10,aniso11,aniso12, & @@ -116,8 +117,8 @@ subroutine read_material_table() aniso11(:) = 0.d0 comp_g(:) = 0.0d0 - QKappa(:) = 9999.d0 - Qmu(:) = 9999.d0 + QKappa(:) = ATTENUATION_COMP_MAXIMUM + Qmu(:) = ATTENUATION_COMP_MAXIMUM rho_s_read(:) = 0.d0 rho_f_read(:) = 0.d0 diff --git a/src/shared/rules.mk b/src/shared/rules.mk index be46e8024..17b161a45 100644 --- a/src/shared/rules.mk +++ b/src/shared/rules.mk @@ -48,6 +48,7 @@ shared_OBJECTS = \ $O/exit_mpi.shared.o \ $O/force_ftz.cc.o \ $O/gll_library.shared.o \ + $O/init_openmp.shared.o \ $O/lagrange_poly.shared.o \ $O/parallel.sharedmpi.o \ $O/param_reader.cc.o \ diff --git a/src/specfem2D/compute_forces_viscoelastic.F90 b/src/specfem2D/compute_forces_viscoelastic.F90 index a348c47f8..24ca0ef96 100644 --- a/src/specfem2D/compute_forces_viscoelastic.F90 +++ b/src/specfem2D/compute_forces_viscoelastic.F90 @@ -143,7 +143,30 @@ subroutine compute_forces_viscoelastic(accel_elastic,veloc_elastic,displ_elastic num_elements = nspec_inner_elastic endif +! openmp solver +!$OMP PARALLEL if (num_elements > 100) & +!$OMP DEFAULT(PRIVATE) & +!$OMP SHARED( & +!$OMP num_elements,ibool,ispec_is_elastic,ispec_is_anisotropic, & +!$OMP phase_ispec_inner_elastic,iphase, & +!$OMP displ_elastic,veloc_elastic,accel_elastic, & +!$OMP xix,xiz,gammax,gammaz,jacobian, & +!$OMP rho_vpstore,mustore,rhostore,qkappa_attenuation_store,qmu_attenuation_store, & +!$OMP c11store,c12store,c13store,c15store,c22store,c23store,c25store,c33store,c35store,c55store, & +!$OMP AXISYM,is_on_the_axis,coord,iglob_is_forced, & +!$OMP nglob,P_SV,ATTENUATION_VISCOELASTIC,nspec_ATT_el,N_SLS, & +!$OMP PML_BOUNDARY_CONDITIONS,nspec_PML,ispec_is_PML,ROTATE_PML_ACTIVATE,ROTATE_PML_ANGLE, & +!$OMP displ_elastic_old,dux_dxl_old,duz_dzl_old,dux_dzl_plus_duz_dxl_old,e1,e11,e13 & +!$OMP ) & +!$OMP FIRSTPRIVATE( & +!$OMP hprime_xx,hprimewgll_xx, & +!$OMP hprime_zz,hprimewgll_zz, & +!$OMP hprimeBar_xx,hprimeBarwglj_xx,xiglj,wxglj, & +!$OMP wxgll,wzgll & +!$OMP ) + ! loop over spectral elements +!$OMP DO do ispec_p = 1,num_elements ! returns element id from stored element list @@ -695,9 +718,11 @@ subroutine compute_forces_viscoelastic(accel_elastic,veloc_elastic,displ_elastic tempz2l = tempz2l + tempz2(i,k) * hprimewgll_zz(k,j) enddo ! sums contributions from each element to the global values +!$OMP ATOMIC accel_elastic(1,iglob) = accel_elastic(1,iglob) - (wzgll(j) * tempx1l + wxglj(i) * tempx2l) +!$OMP ATOMIC accel_elastic(2,iglob) = accel_elastic(2,iglob) - (wzgll(j) * tempz1l + wxglj(i) * tempz2l) - +!$OMP ATOMIC accel_elastic(1,iglob) = accel_elastic(1,iglob) - wzgll(j) * tempx3(i,j) endif enddo @@ -720,9 +745,11 @@ subroutine compute_forces_viscoelastic(accel_elastic,veloc_elastic,displ_elastic tempz2l = tempz2l + tempz2(i,k) * hprimewgll_zz(k,j) enddo ! sums contributions from each element to the global values +!$OMP ATOMIC accel_elastic(1,iglob) = accel_elastic(1,iglob) - (wzgll(j) * tempx1l + wxgll(i) * tempx2l) +!$OMP ATOMIC accel_elastic(2,iglob) = accel_elastic(2,iglob) - (wzgll(j) * tempz1l + wxgll(i) * tempz2l) - +!$OMP ATOMIC accel_elastic(1,iglob) = accel_elastic(1,iglob) - wzgll(j) * tempx3(i,j) endif enddo @@ -746,7 +773,9 @@ subroutine compute_forces_viscoelastic(accel_elastic,veloc_elastic,displ_elastic tempz2l = tempz2l + tempz2(i,k) * hprimewgll_zz(k,j) enddo ! sums contributions from each element to the global values +!$OMP ATOMIC accel_elastic(1,iglob) = accel_elastic(1,iglob) - (wzgll(j) * tempx1l + wxgll(i) * tempx2l) +!$OMP ATOMIC accel_elastic(2,iglob) = accel_elastic(2,iglob) - (wzgll(j) * tempz1l + wxgll(i) * tempz2l) endif enddo @@ -760,7 +789,9 @@ subroutine compute_forces_viscoelastic(accel_elastic,veloc_elastic,displ_elastic do i = 1,NGLLX iglob = ibool(i,j,ispec) if (.not. iglob_is_forced(iglob)) then +!$OMP ATOMIC accel_elastic(1,iglob) = accel_elastic(1,iglob) - accel_elastic_PML(1,i,j) +!$OMP ATOMIC accel_elastic(2,iglob) = accel_elastic(2,iglob) - accel_elastic_PML(2,i,j) endif enddo @@ -769,6 +800,8 @@ subroutine compute_forces_viscoelastic(accel_elastic,veloc_elastic,displ_elastic endif enddo ! end of loop over all spectral elements +!$OMP ENDDO +!$OMP END PARALLEL contains diff --git a/src/specfem2D/define_external_model.f90 b/src/specfem2D/define_external_model.f90 index d00ccdc28..94c3ea6ae 100644 --- a/src/specfem2D/define_external_model.f90 +++ b/src/specfem2D/define_external_model.f90 @@ -35,7 +35,7 @@ subroutine define_external_model_dummy(coord,material_element,ibool, & rho,vp,vs,QKappa_attenuation,Qmu_attenuation, & c11,c12,c13,c15,c23,c25,c33,c35,c55) - use constants, only: CUSTOM_REAL,NGLLX,NGLLZ,NDIM,IMAIN + use constants, only: CUSTOM_REAL,NGLLX,NGLLZ,NDIM,IMAIN,ATTENUATION_COMP_MAXIMUM use specfem_par, only: myrank,nspec,nglob @@ -75,8 +75,8 @@ subroutine define_external_model_dummy(coord,material_element,ibool, & endif ! default no attenuation - QKappa_attenuation(:,:,:) = 9999.d0 - Qmu_attenuation(:,:,:) = 9999.d0 + QKappa_attenuation(:,:,:) = ATTENUATION_COMP_MAXIMUM + Qmu_attenuation(:,:,:) = ATTENUATION_COMP_MAXIMUM ! loop on all the elements of the mesh, and inside each element loop on all the GLL points do ispec = 1,nspec @@ -92,8 +92,8 @@ subroutine define_external_model_dummy(coord,material_element,ibool, & rho(i,j,ispec) = 2000.d0 + 10.d0*x - 32.17d0*z**3 vp(i,j,ispec) = 3000.d0 - 7.12d0*cos(x) vs(i,j,ispec) = vp(i,j,ispec) / sqrt(3.d0) - QKappa_attenuation(i,j,ispec) = 9999. ! this means no attenuation - Qmu_attenuation(i,j,ispec) = 9999. ! this means no attenuation + QKappa_attenuation(i,j,ispec) = ATTENUATION_COMP_MAXIMUM ! this means no attenuation + Qmu_attenuation(i,j,ispec) = ATTENUATION_COMP_MAXIMUM ! this means no attenuation c11(i,j,ispec) = 169.d9 c13(i,j,ispec) = 122.d9 c15(i,j,ispec) = 0.d0 @@ -142,7 +142,7 @@ subroutine define_external_model(coord,material_element,ibool, & rho,vp,vs,QKappa_attenuation,Qmu_attenuation, & c11,c12,c13,c15,c23,c25,c33,c35,c55) - use constants, only: CUSTOM_REAL,NGLLX,NGLLZ,NDIM,IMAIN + use constants, only: CUSTOM_REAL,NGLLX,NGLLZ,NDIM,IMAIN,ATTENUATION_COMP_MAXIMUM use specfem_par, only: myrank,nspec,nglob @@ -214,8 +214,8 @@ subroutine define_external_model(coord,material_element,ibool, & endif ! default no attenuation - QKappa_attenuation(:,:,:) = 9999.d0 - Qmu_attenuation(:,:,:) = 9999.d0 + QKappa_attenuation(:,:,:) = ATTENUATION_COMP_MAXIMUM + Qmu_attenuation(:,:,:) = ATTENUATION_COMP_MAXIMUM ! default no anisotropy c11(:,:,:) = 0.d0 @@ -1118,8 +1118,8 @@ subroutine define_external_model(coord,material_element,ibool, & ! also set fictitious attenuation to a very high value (attenuation is not used in the fluid) if (material_element(ispec) == IREGION_OUTER_CORE) then vs(i,j,ispec) = 0.d0 - Qkappa_attenuation(i,j,ispec) = 9999.d0 - Qmu_attenuation(i,j,ispec) = 9999.d0 + Qkappa_attenuation(i,j,ispec) = ATTENUATION_COMP_MAXIMUM + Qmu_attenuation(i,j,ispec) = ATTENUATION_COMP_MAXIMUM endif enddo diff --git a/src/specfem2D/define_external_model_from_marmousi.f90 b/src/specfem2D/define_external_model_from_marmousi.f90 index 716708939..36c239a85 100644 --- a/src/specfem2D/define_external_model_from_marmousi.f90 +++ b/src/specfem2D/define_external_model_from_marmousi.f90 @@ -37,7 +37,7 @@ subroutine define_external_model_from_marmousi(coord,ibool,rho,vp,vs,QKappa_atte ! reads in external model using a marmousi format which defines a compaction gradient - use constants, only: CUSTOM_REAL,NGLLX,NGLLZ,NDIM,IMAIN + use constants, only: CUSTOM_REAL,NGLLX,NGLLZ,NDIM,IMAIN,ATTENUATION_COMP_MAXIMUM use specfem_par, only: poroelastcoef,density,kmato,myrank,nspec,nglob @@ -66,8 +66,8 @@ subroutine define_external_model_from_marmousi(coord,ibool,rho,vp,vs,QKappa_atte endif ! default no attenuation - QKappa_attenuation(:,:,:) = 9999.d0 - Qmu_attenuation(:,:,:) = 9999.d0 + QKappa_attenuation(:,:,:) = ATTENUATION_COMP_MAXIMUM + Qmu_attenuation(:,:,:) = ATTENUATION_COMP_MAXIMUM ! loop on all the elements of the mesh, and inside each element loop on all the GLL points do ispec = 1,nspec @@ -93,8 +93,8 @@ subroutine define_external_model_from_marmousi(coord,ibool,rho,vp,vs,QKappa_atte ! assumes Poisson solids vs(i,j,ispec) = vp(i,j,ispec) / sqrt(3.d0) - QKappa_attenuation(i,j,ispec) = 9999. ! this means no attenuation - Qmu_attenuation(i,j,ispec) = 9999. ! this means no attenuation + QKappa_attenuation(i,j,ispec) = ATTENUATION_COMP_MAXIMUM ! this means no attenuation + Qmu_attenuation(i,j,ispec) = ATTENUATION_COMP_MAXIMUM ! this means no attenuation c11(i,j,ispec) = 0.d0 ! this means no anisotropy c13(i,j,ispec) = 0.d0 diff --git a/src/specfem2D/define_external_model_from_tomo_file.f90 b/src/specfem2D/define_external_model_from_tomo_file.f90 index 8bb0e5ea1..6a8e91d3d 100644 --- a/src/specfem2D/define_external_model_from_tomo_file.f90 +++ b/src/specfem2D/define_external_model_from_tomo_file.f90 @@ -50,6 +50,7 @@ module model_tomography_par ! models parameter records double precision, dimension(:), allocatable :: x_tomo,z_tomo double precision, dimension(:,:), allocatable :: vp_tomo,vs_tomo,rho_tomo + double precision, dimension(:,:), allocatable :: qp_tomo,qs_tomo ! models entries integer :: NX,NZ @@ -58,6 +59,9 @@ module model_tomography_par ! min/max statistics double precision :: VP_MIN,VS_MIN,RHO_MIN,VP_MAX,VS_MAX,RHO_MAX + ! (optional) Q values + logical :: has_q_values + end module model_tomography_par ! @@ -345,7 +349,7 @@ subroutine define_external_model_from_tomo_file(rhoext,vpext,vsext, & ! ---------------------------------------------------------------------------------------- use specfem_par, only: tomo_material,coord,nspec,ibool,kmato, & - QKappa_attenuationcoef,Qmu_attenuationcoef,anisotropycoef, & + Qkappa_attenuationcoef,Qmu_attenuationcoef,anisotropycoef, & poroelastcoef,density use specfem_par, only: myrank,TOMOGRAPHY_FILE @@ -353,18 +357,22 @@ subroutine define_external_model_from_tomo_file(rhoext,vpext,vsext, & use model_tomography_par use interpolation - use constants, only: NGLLX,NGLLZ,TINYVAL,IMAIN,CUSTOM_REAL + use constants, only: NGLLX,NGLLZ,TINYVAL,IMAIN,CUSTOM_REAL,ATTENUATION_COMP_MAXIMUM implicit none real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLZ,nspec), intent(out) :: rhoext,vpext,vsext - real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLZ,nspec), intent(out) :: QKappa_attenuationext,Qmu_attenuationext + real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLZ,nspec), intent(out) :: Qkappa_attenuationext,Qmu_attenuationext real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLZ,nspec), intent(out) :: c11ext,c12ext,c13ext,c15ext,c22ext,c23ext,c25ext, & c33ext,c35ext,c55ext ! local parameters integer :: i,j,ispec,iglob double precision :: xmesh,zmesh + double precision :: rho_final + double precision :: vp_final,vs_final + double precision :: qp_final,qs_final + double precision :: L_val,qmu_atten,qkappa_atten ! stats integer :: npoint_tomo,npoint_internal @@ -378,8 +386,8 @@ subroutine define_external_model_from_tomo_file(rhoext,vpext,vsext, & call read_tomo_file() ! default no attenuation - QKappa_attenuationext(:,:,:) = 9999.d0 - Qmu_attenuationext(:,:,:) = 9999.d0 + QKappa_attenuationext(:,:,:) = ATTENUATION_COMP_MAXIMUM + Qmu_attenuationext(:,:,:) = ATTENUATION_COMP_MAXIMUM ! default no anisotropy c11ext(:,:,:) = 0.d0 @@ -405,9 +413,50 @@ subroutine define_external_model_from_tomo_file(rhoext,vpext,vsext, & xmesh = coord(1,iglob) zmesh = coord(2,iglob) - rhoext(i,j,ispec) = interpolate(NX, x_tomo, NZ, z_tomo, rho_tomo, xmesh, zmesh,TINYVAL) - vpext(i,j,ispec) = interpolate(NX, x_tomo, NZ, z_tomo, vp_tomo, xmesh, zmesh,TINYVAL) - vsext(i,j,ispec) = interpolate(NX, x_tomo, NZ, z_tomo, vs_tomo, xmesh, zmesh,TINYVAL) + rho_final = interpolate(NX, x_tomo, NZ, z_tomo, rho_tomo, xmesh, zmesh, TINYVAL) + vp_final = interpolate(NX, x_tomo, NZ, z_tomo, vp_tomo, xmesh, zmesh, TINYVAL) + vs_final = interpolate(NX, x_tomo, NZ, z_tomo, vs_tomo, xmesh, zmesh, TINYVAL) + + rhoext(i,j,ispec) = rho_final + vpext(i,j,ispec) = vp_final + vsext(i,j,ispec) = vs_final + + if (has_q_values) then + qp_final = interpolate(NX, x_tomo, NZ, z_tomo, qp_tomo, xmesh, zmesh, TINYVAL) + qs_final = interpolate(NX, x_tomo, NZ, z_tomo, qs_tomo, xmesh, zmesh, TINYVAL) + + ! attenuation zero (means negligible attenuation) + if (qs_final <= TINYVAL) qs_final = ATTENUATION_COMP_MAXIMUM + if (qp_final <= TINYVAL) qp_final = ATTENUATION_COMP_MAXIMUM + + ! Anderson & Hart (1978) conversion between (Qp,Qs) and (Qkappa,Qmu) + ! factor L + L_val = 4.d0/3.d0 * (vs_final/vp_final)**2 + + ! shear attenuation + qmu_atten = qs_final + + ! converts to bulk attenuation + if (abs(qs_final - L_val * qp_final) <= TINYVAL) then + ! negligible bulk attenuation + qkappa_atten = ATTENUATION_COMP_MAXIMUM + else + qkappa_atten = (1.d0 - L_val) * qp_final * qs_final / (qs_final - L_val * qp_final) + endif + + ! attenuation zero (means negligible attenuation) + if (qmu_atten <= TINYVAL) qmu_atten = ATTENUATION_COMP_MAXIMUM + if (qkappa_atten <= TINYVAL) qkappa_atten = ATTENUATION_COMP_MAXIMUM + + ! limits Q values + if (qmu_atten < 1.0d0) qmu_atten = 1.0d0 + if (qmu_atten > ATTENUATION_COMP_MAXIMUM) qmu_atten = ATTENUATION_COMP_MAXIMUM + if (qkappa_atten < 1.0d0) qkappa_atten = 1.0d0 + if (qkappa_atten > ATTENUATION_COMP_MAXIMUM) qkappa_atten = ATTENUATION_COMP_MAXIMUM + + Qkappa_attenuationext(i,j,ispec) = qkappa_atten + Qmu_attenuationext(i,j,ispec) = qmu_atten + endif !! ABAB : The 3 following lines are important, otherwise PMLs won't work. TODO check that !! (we assign these values several times: indeed for each kmato(ispec) it can exist a lot of rhoext(i,j,ispec) ) @@ -514,6 +563,11 @@ subroutine read_tomo_file() ! ... ! x(NX) z(NZ) vp vs rho ! +! +! in case Q-values are also provided, the data lines here would have a format like: +! x(1) z(1) vp vs rho Qp Qs +! .. + ! Where : ! _x and z must be increasing ! _ORIGIN_X, END_X are, respectively, the coordinates of the initial and final tomographic @@ -538,16 +592,23 @@ subroutine read_tomo_file() use specfem_par, only: myrank,TOMOGRAPHY_FILE use model_tomography_par - use constants, only: IIN,IMAIN,HUGEVAL + use constants, only: IIN,IMAIN,HUGEVAL,MAX_STRING_LEN implicit none ! local parameters integer :: ier,irecord,i,j - character(len=150) :: string_read + character(len=MAX_STRING_LEN) :: string_read double precision, dimension(:), allocatable :: x_tomography,z_tomography,vp_tomography,vs_tomography,rho_tomography - double precision :: read_rho_min,read_rho_max,read_vp_min,read_vp_max,read_vs_min,read_vs_max,tmp_dp + double precision, dimension(:), allocatable :: qp_tomography,qs_tomography + double precision :: read_rho_min,read_rho_max + double precision :: read_vp_min,read_vp_max + double precision :: read_vs_min,read_vs_max + double precision :: read_qp_min,read_qp_max + double precision :: read_qs_min,read_qs_max + double precision :: tmp_dp + integer :: ntokens ! opens file for reading open(unit=IIN,file=trim(TOMOGRAPHY_FILE),status='old',action='read',iostat=ier) @@ -611,11 +672,60 @@ subroutine read_tomo_file() read_vs_max = - HUGEVAL read_rho_min = + HUGEVAL read_rho_max = - HUGEVAL + read_qp_min = + HUGEVAL + read_qp_max = - HUGEVAL + read_qs_min = + HUGEVAL + read_qs_max = - HUGEVAL do while (irecord < nrecord) call tomo_read_next_line(IIN,string_read) - read(string_read,*) x_tomography(irecord+1),z_tomography(irecord+1), & - vp_tomography(irecord+1),vs_tomography(irecord+1),rho_tomography(irecord+1) + + if (irecord == 0) then + ! checks number of entries of first data line + call tomo_get_number_of_tokens(string_read,ntokens) + + !debug + !print *,'tomography file: number of tokens on first data line: ',ntokens,' line: ***'//trim(string_read)//'***' + + ! data line formats: + ! #x(1) #z(1) #vp #vs #rho + ! or + ! #x(1) #z(1) #vp #vs #rho #Qp #Qs + if (ntokens /= 5 .and. ntokens /= 7) then + print *,'Error reading tomography file, data line has wrong number of entries: ',trim(string_read) + stop 'Error reading tomography file' + endif + + ! determines data format + if (ntokens == 7) then + has_q_values = .true. + else + has_q_values = .false. + endif + + if (has_q_values) then + ! allocate models parameter records + allocate(qp_tomography(nrecord),qs_tomography(nrecord),stat=ier) + if (ier /= 0) call exit_MPI(myrank,'not enough memory to allocate tomo Q arrays') + qp_tomography(:) = 0.d0; qs_tomography(:) = 0.d0 + + allocate(qp_tomo(NX,NZ),qs_tomo(NX,NZ),stat=ier) + if (ier /= 0) call exit_MPI(myrank,'not enough memory to allocate tomo arrays') + qp_tomo(:,:) = 0.d0; qs_tomo(:,:) = 0.d0 + endif + endif + + ! reads in tomo values + if (has_q_values) then + ! #x(1) #z(1) #vp #vs #rho #Qp #Qs + read(string_read,*) x_tomography(irecord+1),z_tomography(irecord+1), & + vp_tomography(irecord+1),vs_tomography(irecord+1),rho_tomography(irecord+1), & + qp_tomography(irecord+1),qs_tomography(irecord+1) + else + ! #x(1) #z(1) #vp #vs #rho + read(string_read,*) x_tomography(irecord+1),z_tomography(irecord+1), & + vp_tomography(irecord+1),vs_tomography(irecord+1),rho_tomography(irecord+1) + endif if (irecord < NX) x_tomo(irecord+1) = x_tomography(irecord+1) @@ -627,6 +737,13 @@ subroutine read_tomo_file() if (rho_tomography(irecord+1) > read_rho_max) read_rho_max = rho_tomography(irecord+1) if (rho_tomography(irecord+1) < read_rho_min) read_rho_min = rho_tomography(irecord+1) + if (has_q_values) then + if (qp_tomography(irecord+1) > read_qp_max) read_qp_max = qp_tomography(irecord+1) + if (qp_tomography(irecord+1) < read_qp_min) read_qp_min = qp_tomography(irecord+1) + if (qs_tomography(irecord+1) > read_qs_max) read_qs_max = qs_tomography(irecord+1) + if (qs_tomography(irecord+1) < read_qs_min) read_qs_min = qs_tomography(irecord+1) + endif + ! counter irecord = irecord + 1 enddo @@ -641,6 +758,15 @@ subroutine read_tomo_file() enddo enddo + if (has_q_values) then + do i = 1,NX + do j = 1,NZ + qp_tomo(i,j) = qp_tomography(NX*(j-1)+i) + qs_tomo(i,j) = qs_tomography(NX*(j-1)+i) + enddo + enddo + endif + call synchronize_all() if (irecord /= nrecord .and. myrank == 0) then @@ -666,6 +792,19 @@ subroutine read_tomo_file() tmp_dp = read_rho_max call max_all_dp(tmp_dp,read_rho_max) + if (has_q_values) then + ! Q-model values + tmp_dp = read_qp_min + call min_all_dp(tmp_dp,read_qp_min) + tmp_dp = read_qp_max + call max_all_dp(tmp_dp,read_qp_max) + + tmp_dp = read_qs_min + call min_all_dp(tmp_dp,read_qs_min) + tmp_dp = read_qs_max + call max_all_dp(tmp_dp,read_qs_max) + endif + ! user output if (myrank == 0) then write(IMAIN,*) ' Number of grid points = NX*NZ:',nrecord @@ -674,12 +813,20 @@ subroutine read_tomo_file() write(IMAIN,*) ' vs : min/max = ',sngl(read_vs_min),' / ',sngl(read_vs_max) write(IMAIN,*) ' rho: min/max = ',sngl(read_rho_min),' / ',sngl(read_rho_max) write(IMAIN,*) + if (has_q_values) then + write(IMAIN,*) ' Qp : min/max = ',sngl(read_qp_min),' / ',sngl(read_qp_max) + write(IMAIN,*) ' Qs : min/max = ',sngl(read_qs_min),' / ',sngl(read_qs_max) + write(IMAIN,*) + endif call flush_IMAIN() endif ! closes file close(IIN) deallocate(x_tomography,z_tomography,vp_tomography,vs_tomography,rho_tomography) + if (has_q_values) then + deallocate(qp_tomography,qs_tomography) + endif end subroutine read_tomo_file @@ -691,10 +838,11 @@ subroutine tomo_read_next_line(unit_in,string_read) ! Store next line of file unit_in in string_read + use constants, only: MAX_STRING_LEN implicit none integer :: unit_in - character(len=150) :: string_read + character(len=MAX_STRING_LEN) :: string_read integer :: ier @@ -708,8 +856,10 @@ subroutine tomo_read_next_line(unit_in,string_read) ! suppress trailing carriage return (ASCII code 13) if any (e.g. if input text file coming from Windows/DOS) if (index(string_read,achar(13)) > 0) string_read = string_read(1:index(string_read,achar(13))-1) - ! exit loop when we find the first line that is not a comment or a white line + ! reads next line if empty if (len_trim(string_read) == 0) cycle + + ! exit loop when we find the first line that is not a comment or a white line if (string_read(1:1) /= '#') exit enddo @@ -725,3 +875,44 @@ subroutine tomo_read_next_line(unit_in,string_read) end subroutine tomo_read_next_line +! +!------------------------------------------------------------------------------------------------- +! + + subroutine tomo_get_number_of_tokens(string_read,ntokens) + + use constants, only: MAX_STRING_LEN + implicit none + + character(len=MAX_STRING_LEN),intent(in) :: string_read + integer,intent(out) :: ntokens + + ! local parameters + integer :: i + logical :: previous_is_delim + character(len=1), parameter :: delim_space = ' ' + character(len=1), parameter :: delim_tab = achar(9) ! tab delimiter + + ! initializes + ntokens = 0 + + ! checks if anything to do + if (len_trim(string_read) == 0) return + + ! counts tokens + ntokens = 1 + previous_is_delim = .true. + do i = 1, len_trim(string_read) + ! finds next delimiter (space or tabular) + if (string_read(i:i) == delim_space .or. string_read(i:i) == delim_tab) then + if (.not. previous_is_delim) then + ntokens = ntokens + 1 + previous_is_delim = .true. + endif + else + if (previous_is_delim) previous_is_delim = .false. + endif + enddo + + end subroutine tomo_get_number_of_tokens + diff --git a/src/specfem2D/initialize_simulation.F90 b/src/specfem2D/initialize_simulation.F90 index ba3ab3d57..3645c5cea 100644 --- a/src/specfem2D/initialize_simulation.F90 +++ b/src/specfem2D/initialize_simulation.F90 @@ -143,6 +143,9 @@ subroutine initialize_simulation() ! initializes GPU cards if (GPU_MODE) call initialize_GPU() + ! output info for possible OpenMP + call init_openmp() + ! ----------- initialization and determining parameters ! allocates mesh ! local to global indexing diff --git a/src/specfem2D/read_external_model.f90 b/src/specfem2D/read_external_model.f90 index d2af0df9e..383227b44 100644 --- a/src/specfem2D/read_external_model.f90 +++ b/src/specfem2D/read_external_model.f90 @@ -36,7 +36,8 @@ subroutine read_external_model(rhoext,vpext,vsext,QKappa_attenuationext,Qmu_atte ! reads in external model files - use constants, only: CUSTOM_REAL,NGLLX,NGLLZ,TINYVAL,IMAIN,IN_DATA_FILES,IIN,MAX_STRING_LEN + use constants, only: CUSTOM_REAL,NGLLX,NGLLZ,TINYVAL,IMAIN,IN_DATA_FILES,IIN,MAX_STRING_LEN, & + ATTENUATION_COMP_MAXIMUM use specfem_par, only: nspec,ibool,ispec_is_elastic,ispec_is_anisotropic, & coord,kmato,MODEL,myrank,setup_with_binary_database @@ -135,8 +136,8 @@ subroutine read_external_model(rhoext,vpext,vsext,QKappa_attenuationext,Qmu_atte close(IIN) ! default no attenuation - QKappa_attenuationext(:,:,:) = 9999.d0 - Qmu_attenuationext(:,:,:) = 9999.d0 + QKappa_attenuationext(:,:,:) = ATTENUATION_COMP_MAXIMUM + Qmu_attenuationext(:,:,:) = ATTENUATION_COMP_MAXIMUM case ('ascii') ! ascii model format @@ -159,8 +160,8 @@ subroutine read_external_model(rhoext,vpext,vsext,QKappa_attenuationext,Qmu_atte enddo close(IIN) ! default no attenuation - QKappa_attenuationext(:,:,:) = 9999.d0 - Qmu_attenuationext(:,:,:) = 9999.d0 + QKappa_attenuationext(:,:,:) = ATTENUATION_COMP_MAXIMUM + Qmu_attenuationext(:,:,:) = ATTENUATION_COMP_MAXIMUM case ('binary','gll') ! binary formats @@ -205,7 +206,7 @@ subroutine read_external_model(rhoext,vpext,vsext,QKappa_attenuationext,Qmu_atte read(IIN) QKappa_attenuationext close(IIN) - Qmu_attenuationext(:,:,:) = 9999.d0 + Qmu_attenuationext(:,:,:) = ATTENUATION_COMP_MAXIMUM ! user output if (myrank == 0) then @@ -240,8 +241,8 @@ subroutine read_external_model(rhoext,vpext,vsext,QKappa_attenuationext,Qmu_atte else ! default no attenuation - QKappa_attenuationext(:,:,:) = 9999.d0 - Qmu_attenuationext(:,:,:) = 9999.d0 + QKappa_attenuationext(:,:,:) = ATTENUATION_COMP_MAXIMUM + Qmu_attenuationext(:,:,:) = ATTENUATION_COMP_MAXIMUM endif @@ -326,8 +327,8 @@ subroutine read_external_model(rhoext,vpext,vsext,QKappa_attenuationext,Qmu_atte enddo ! default no attenuation - QKappa_attenuationext(:,:,:) = 9999.d0 - Qmu_attenuationext(:,:,:) = 9999.d0 + QKappa_attenuationext(:,:,:) = ATTENUATION_COMP_MAXIMUM + Qmu_attenuationext(:,:,:) = ATTENUATION_COMP_MAXIMUM case ('external') ! generic model defined in external files diff --git a/src/specfem2D/read_materials.f90 b/src/specfem2D/read_materials.f90 index adc24ad49..2316f407d 100644 --- a/src/specfem2D/read_materials.f90 +++ b/src/specfem2D/read_materials.f90 @@ -36,7 +36,8 @@ subroutine read_materials(f0) ! reads properties of a 2D isotropic or anisotropic linear elastic element use constants, only: IIN,IMAIN,ZERO,FOUR_THIRDS,TWO_THIRDS,HALF,TINYVAL, & - ISOTROPIC_MATERIAL,ANISOTROPIC_MATERIAL,POROELASTIC_MATERIAL + ISOTROPIC_MATERIAL,ANISOTROPIC_MATERIAL,POROELASTIC_MATERIAL, & + ATTENUATION_COMP_MAXIMUM use specfem_par, only: AXISYM,density,porosity,tortuosity,anisotropycoef,permeability,poroelastcoef, & numat,myrank,QKappa_attenuationcoef,Qmu_attenuationcoef, & @@ -72,8 +73,8 @@ subroutine read_materials(f0) poroelastcoef(:,:,:) = ZERO anisotropycoef(:,:) = ZERO - QKappa_attenuationcoef(:) = 9999. - Qmu_attenuationcoef(:) = 9999. + QKappa_attenuationcoef(:) = ATTENUATION_COMP_MAXIMUM + Qmu_attenuationcoef(:) = ATTENUATION_COMP_MAXIMUM ! Index of the material that will be defined by an external tomo file if needed (TOMOGRAPHY_FILE) tomo_material = 0 @@ -97,8 +98,8 @@ subroutine read_materials(f0) c23 = ZERO c25 = ZERO c22 = ZERO - Qkappa = 9999. - Qmu = 9999. + Qkappa = ATTENUATION_COMP_MAXIMUM + Qmu = ATTENUATION_COMP_MAXIMUM ! supported model formats: ! acoustic - model_number 1 rho Vp 0 0 0 QKappa Qmu 0 0 0 0 0 0 @@ -393,8 +394,8 @@ subroutine read_materials(f0) poroelastcoef(3,1,n) = -1.0d0 poroelastcoef(4,1,n) = ZERO - QKappa_attenuationcoef(n) = 9999. - Qmu_attenuationcoef(n) = 9999. + QKappa_attenuationcoef(n) = ATTENUATION_COMP_MAXIMUM + Qmu_attenuationcoef(n) = ATTENUATION_COMP_MAXIMUM if (mu > TINYVAL) then porosity(n) = 0.d0 diff --git a/src/specfem2D/rules.mk b/src/specfem2D/rules.mk index cd2e4ac3d..1a34baf59 100644 --- a/src/specfem2D/rules.mk +++ b/src/specfem2D/rules.mk @@ -195,6 +195,7 @@ specfem2D_SHARED_OBJECTS = \ $O/exit_mpi.shared.o \ $O/force_ftz.cc.o \ $O/gll_library.shared.o \ + $O/init_openmp.shared.o \ $O/lagrange_poly.shared.o \ $O/parallel.sharedmpi.o \ $O/read_parameter_file.shared.o \ diff --git a/src/specfem2D/save_model_files.f90 b/src/specfem2D/save_model_files.f90 index 19d3f1f23..849c4956c 100644 --- a/src/specfem2D/save_model_files.f90 +++ b/src/specfem2D/save_model_files.f90 @@ -88,6 +88,7 @@ subroutine save_model_files() endif if (ATTENUATION_VISCOELASTIC) then + ! safety stop if (ATTENUATION_VISCOACOUSTIC) & call exit_MPI(myrank,'Not possible yet to save model with both acoustic and elastic attenuation')