diff --git a/.github/workflows/continuous-integration-workflow-32bit.yml b/.github/workflows/continuous-integration-workflow-32bit.yml new file mode 100644 index 0000000000..7fab3b0e62 --- /dev/null +++ b/.github/workflows/continuous-integration-workflow-32bit.yml @@ -0,0 +1,37 @@ +name: github-Linux-32bit +on: [push, pull_request] + +concurrency: + group: ${ {github.event_name }}-${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: ${{github.event_name == 'pull_request'}} + +jobs: + CI-32bit: + name: Linux-32bit + runs-on: ubuntu-latest + container: + image: ghcr.io/kokkos/ci-containers/ubuntu:latest + steps: + - name: Checkout code + uses: actions/checkout@v3 + - name: install_multilib + run: sudo apt-get update && sudo apt-get install -y gcc-multilib g++-multilib gfortran-multilib + - name: Configure Kokkos + run: | + cmake -B builddir \ + -DKokkos_ENABLE_OPENMP=ON \ + -DKokkos_ENABLE_TESTS=ON \ + -DKokkos_ENABLE_BENCHMARKS=ON \ + -DKokkos_ENABLE_EXAMPLES=ON \ + -DKokkos_ENABLE_DEPRECATED_CODE_4=ON \ + -DKokkos_ENABLE_DEPRECATION_WARNINGS=OFF \ + -DKokkos_ENABLE_COMPILER_WARNINGS=ON \ + -DCMAKE_CXX_FLAGS="-Werror -m32 -DKOKKOS_IMPL_32BIT" \ + -DCMAKE_CXX_COMPILER=g++ \ + -DCMAKE_BUILD_TYPE=RelWithDebInfo + - name: Build + run: | + cmake --build builddir --parallel 2 + - name: Tests + working-directory: builddir + run: ctest --output-on-failure diff --git a/.github/workflows/continuous-integration-workflow-hpx.yml b/.github/workflows/continuous-integration-workflow-hpx.yml index 35bb5bb2cb..0c7abd2fc1 100644 --- a/.github/workflows/continuous-integration-workflow-hpx.yml +++ b/.github/workflows/continuous-integration-workflow-hpx.yml @@ -13,7 +13,7 @@ jobs: steps: - name: checkout code - uses: actions/checkout@v2.2.0 + uses: actions/checkout@v3 with: path: kokkos - name: setup hpx dependencies @@ -26,12 +26,12 @@ jobs: libboost-all-dev \ ninja-build - name: checkout hpx - uses: actions/checkout@v2.2.0 + uses: actions/checkout@v3 with: repository: STELLAR-GROUP/hpx - ref: 1.7.1 + ref: 1.8.0 path: hpx - - uses: actions/cache@v2 + - uses: actions/cache@v3 id: cache-hpx with: path: ./hpx/install @@ -69,12 +69,10 @@ jobs: -DCMAKE_CXX_COMPILER=clang++ \ -DCMAKE_CXX_FLAGS="-Werror" \ -DHPX_ROOT=$PWD/../../hpx/install \ - -DKokkos_ARCH_NATIVE=ON \ -DKokkos_ENABLE_COMPILER_WARNINGS=ON \ -DKokkos_ENABLE_DEPRECATED_CODE_4=OFF \ -DKokkos_ENABLE_EXAMPLES=ON \ -DKokkos_ENABLE_HPX=ON \ - -DKokkos_ENABLE_HPX_ASYNC_DISPATCH=ON \ -DKokkos_ENABLE_SERIAL=OFF \ -DKokkos_ENABLE_TESTS=ON \ .. diff --git a/.github/workflows/continuous-integration-workflow.yml b/.github/workflows/continuous-integration-workflow.yml index 55b8817948..741f6b2746 100644 --- a/.github/workflows/continuous-integration-workflow.yml +++ b/.github/workflows/continuous-integration-workflow.yml @@ -12,30 +12,31 @@ jobs: matrix: distro: ['fedora:latest', 'fedora:rawhide', 'ubuntu:latest'] cxx: ['g++', 'clang++'] + cxx_extra_flags: [''] cmake_build_type: ['Release', 'Debug'] backend: ['OPENMP'] clang-tidy: [''] include: - distro: 'fedora:intel' cxx: 'icpc' + cxx_extra_flags: '-diag-disable=177,10441' cmake_build_type: 'Release' backend: 'OPENMP' - clang-tidy: '' - distro: 'fedora:intel' cxx: 'icpc' + cxx_extra_flags: '-diag-disable=177,10441' cmake_build_type: 'Debug' backend: 'OPENMP' - clang-tidy: '' - distro: 'fedora:intel' cxx: 'icpx' + cxx_extra_flags: '-fp-model=precise -Wno-pass-failed' cmake_build_type: 'Release' backend: 'OPENMP' - clang-tidy: '' - distro: 'fedora:intel' cxx: 'icpx' + cxx_extra_flags: '-fp-model=precise -Wno-pass-failed' cmake_build_type: 'Debug' backend: 'OPENMP' - clang-tidy: '' - distro: 'ubuntu:latest' cxx: 'clang++' cmake_build_type: 'RelWithDebInfo' @@ -48,11 +49,9 @@ jobs: runs-on: ubuntu-latest container: image: ghcr.io/kokkos/ci-containers/${{ matrix.distro }} - # see https://github.com/actions/virtual-environments/issues/3812 - options: --security-opt seccomp=unconfined steps: - name: Checkout desul - uses: actions/checkout@v2.2.0 + uses: actions/checkout@v3 with: repository: desul/desul ref: 477da9c8f40f8db369c28dd3f93a67e376d8511b @@ -67,21 +66,17 @@ jobs: cmake -DDESUL_ENABLE_TESTS=OFF -DCMAKE_INSTALL_PREFIX=/usr/desul-install .. sudo cmake --build . --target install --parallel 2 - name: Checkout code - uses: actions/checkout@v2.2.0 - - uses: actions/cache@v2 + uses: actions/checkout@v3 + - uses: actions/cache@v3 with: - path: ~/.ccache - key: kokkos-${{ matrix.distro }}-${{ matrix.cxx }}-${{ matrix.cmake_build_type }}-${{ matrix.openmp }}-${github.ref}-${{ github.sha }} - restore-keys: kokkos-${{ matrix.distro }}-${{ matrix.cxx }}-${{ matrix.cmake_build_type }}-${{ matrix.openmp }}-${{github.ref}} + path: ~/.cache/ccache + key: kokkos-${{ matrix.distro }}-${{ matrix.cxx }}-${{ matrix.cmake_build_type }}-${{ matrix.openmp }}-${{ github.ref }}-${{ github.sha }} + restore-keys: kokkos-${{ matrix.distro }}-${{ matrix.cxx }}-${{ matrix.cmake_build_type }}-${{ matrix.openmp }}-${{ github.ref }} - name: maybe_disable_death_tests if: ${{ matrix.distro == 'fedora:rawhide' }} run: echo "GTEST_FILTER=-*DeathTest*" >> $GITHUB_ENV -# Re-enable when latest is F37+ -# - name: maybe_use_flang -# if: ${{ matrix.cxx == 'clang++' && startsWith(matrix.distro,'fedora:') }} -# run: echo "FC=flang" >> $GITHUB_ENV - name: maybe_use_flang_new - if: ${{ matrix.cxx == 'clang++' && startsWith(matrix.distro,'fedora:rawhide') }} + if: ${{ matrix.cxx == 'clang++' && startsWith(matrix.distro,'fedora:') }} run: echo "FC=flang-new" >> $GITHUB_ENV - name: maybe_use_external_gtest if: ${{ matrix.distro == 'ubuntu:latest' }} @@ -95,7 +90,6 @@ jobs: -DCMAKE_INSTALL_PREFIX=/usr \ ${{ matrix.clang-tidy }} \ -Ddesul_ROOT=/usr/desul-install/ \ - -DKokkos_ARCH_NATIVE=ON \ -DKokkos_ENABLE_DESUL_ATOMICS_EXTERNAL=ON \ -DKokkos_ENABLE_HWLOC=ON \ -DKokkos_ENABLE_${{ matrix.backend }}=ON \ @@ -104,7 +98,10 @@ jobs: -DKokkos_ENABLE_EXAMPLES=ON \ -DKokkos_ENABLE_DEPRECATED_CODE_4=ON \ -DKokkos_ENABLE_DEPRECATION_WARNINGS=OFF \ + -DKokkos_ENABLE_COMPILER_WARNINGS=ON \ + -DCMAKE_CXX_FLAGS="-Werror ${{ matrix.cxx_extra_flags }}" \ -DCMAKE_CXX_COMPILER=${{ matrix.cxx }} \ + -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ -DCMAKE_BUILD_TYPE=${{ matrix.cmake_build_type }} - name: Build run: | @@ -114,6 +111,12 @@ jobs: - name: Tests working-directory: builddir run: ctest --output-on-failure + - name: Test linking against build dir + working-directory: example/build_cmake_installed + run: | + cmake -B builddir_buildtree -DCMAKE_CXX_COMPILER=${{ matrix.cxx }} -DKokkos_ROOT=../../builddir + cmake --build builddir_buildtree + cmake --build builddir_buildtree --target test - name: Test DESTDIR Install run: DESTDIR=${PWD}/install cmake --build builddir --target install && rm -rf ${PWD}/install/usr && rmdir ${PWD}/install - name: Install diff --git a/.github/workflows/osx.yml b/.github/workflows/osx.yml index dae8343f20..0ff3266848 100644 --- a/.github/workflows/osx.yml +++ b/.github/workflows/osx.yml @@ -24,14 +24,13 @@ jobs: cmake_build_type: "Release" steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: configure run: cmake -B build . -DKokkos_ENABLE_${{ matrix.backend }}=On -DCMAKE_CXX_FLAGS="-Werror" -DCMAKE_CXX_STANDARD=17 - -DKokkos_ARCH_NATIVE=ON -DKokkos_ENABLE_COMPILER_WARNINGS=ON -DKokkos_ENABLE_DEPRECATED_CODE_4=OFF -DKokkos_ENABLE_TESTS=On diff --git a/.github/workflows/performance-benchmark.yml b/.github/workflows/performance-benchmark.yml new file mode 100644 index 0000000000..205239e043 --- /dev/null +++ b/.github/workflows/performance-benchmark.yml @@ -0,0 +1,61 @@ +name: github-benchmarks +on: + push: + branches: + - develop + pull_request: + +jobs: + CI: + continue-on-error: true + strategy: + matrix: + distro: ['ubuntu:latest'] + cxx: ['g++', 'clang++'] + backend: ['OPENMP'] + runs-on: ubuntu-latest + container: + image: ghcr.io/kokkos/ci-containers/${{ matrix.distro }} + env: + BUILD_ID: ${{ matrix.distro }}-${{ matrix.cxx }}-${{ matrix.backend }} + steps: + - name: Checkout code + uses: actions/checkout@v3 + - uses: actions/cache@v3 + with: + path: ~/.cache/ccache + key: kokkos-${{ matrix.distro }}-${{ matrix.cxx }}-${{ matrix.backend }}-${{ github.ref }}-${{ github.sha }} + restore-keys: kokkos-${{ matrix.distro }}-${{ matrix.cxx }}-${{ matrix.backend }}-${{ github.ref }} + - name: Configure Kokkos + run: | + cmake -B builddir \ + -DKokkos_ENABLE_HWLOC=ON \ + -DKokkos_ENABLE_${{ matrix.backend }}=ON \ + -DKokkos_ENABLE_BENCHMARKS=ON \ + -DCMAKE_CXX_COMPILER=${{ matrix.cxx }} \ + -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ + -DCMAKE_BUILD_TYPE=Release + - name: Build + run: | + ccache -z + NUM_CPU=$(grep -c processor /proc/cpuinfo) + cmake --build builddir --parallel ${NUM_CPU} + ccache -s + - name: Tests + working-directory: builddir + run: ctest --output-on-failure + - name: Gather benchmark results + run: | + mkdir ${{ env.BUILD_ID }} + find builddir/core/perf_test/ -name "*.json" -exec mv {} ${{ env.BUILD_ID }}/ \; + - name: Push benchmark results + if: ${{ github.ref == 'refs/heads/develop' }} + uses: dmnemec/copy_file_to_another_repo_action@main + env: + API_TOKEN_GITHUB: ${{ secrets.DALG24_PUSH_BENCHMARK_RESULTS }} + with: + source_file: ${{ env.BUILD_ID }} + destination_repo: 'kokkos/kokkos-benchmark-results' + destination_branch: 'main' + user_email: 'kokkos@users.noreply.github.com' + user_name: 'Kokkos Developers' diff --git a/.jenkins b/.jenkins index 1775a57d3b..c7d8ce533d 100644 --- a/.jenkins +++ b/.jenkins @@ -17,7 +17,7 @@ pipeline { dockerfile { filename 'Dockerfile.clang' dir 'scripts/docker' - label 'nvidia-docker || docker' + label 'nvidia-docker || rocm-docker || docker' args '-v /tmp/ccache.kokkos:/tmp/ccache' } } @@ -101,12 +101,14 @@ pipeline { } steps { sh 'ccache --zero-stats' - sh '''rm -rf build && mkdir -p build && cd build && \ + sh '''. /opt/intel/oneapi/setvars.sh --include-intel-llvm && \ + rm -rf build && mkdir -p build && cd build && \ cmake \ -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ - -DCMAKE_CXX_COMPILER=clang++ \ - -DCMAKE_CXX_FLAGS="-fsycl-device-code-split=per_kernel -Werror -Wno-gnu-zero-variadic-macro-arguments -Wno-linker-warnings" \ + -DCMAKE_CXX_COMPILER=/opt/intel/oneapi/compiler/2023.0.0/linux/bin-llvm/clang++ \ + -DCMAKE_CXX_FLAGS="-fsycl-device-code-split=per_kernel -Wno-deprecated-declarations -Werror -Wno-gnu-zero-variadic-macro-arguments -Wno-unknown-cuda-version -Wno-sycl-target" \ + -DKOKKOS_IMPL_SYCL_DEVICE_GLOBAL_SUPPORTED=0 \ -DKokkos_ARCH_NATIVE=ON \ -DKokkos_ARCH_VOLTA70=ON \ -DKokkos_ENABLE_COMPILER_WARNINGS=ON \ @@ -343,7 +345,7 @@ pipeline { --with-cuda \ --with-cuda-options=enable_lambda \ --arch=Volta70 \ - .. && \ + && \ make test -j8''' } post { @@ -487,6 +489,7 @@ pipeline { -DCMAKE_CXX_FLAGS=-Werror \ -DKokkos_ARCH_NATIVE=ON \ -DKokkos_ENABLE_COMPILER_WARNINGS=ON \ + -DKokkos_ENABLE_DEPRECATED_CODE_3=ON \ -DKokkos_ENABLE_DEPRECATED_CODE_4=ON \ -DKokkos_ENABLE_DEPRECATION_WARNINGS=OFF \ -DKokkos_ENABLE_TESTS=ON \ diff --git a/.jenkins_nightly b/.jenkins_nightly new file mode 100644 index 0000000000..8bcdb75a2a --- /dev/null +++ b/.jenkins_nightly @@ -0,0 +1,76 @@ +pipeline { + agent none + + options { + timeout(time: 6, unit: 'HOURS') + } + + stages { + stage('Build') { + parallel { + stage('spack-serial') { + agent { + docker { + image 'ubuntu:22.04' + label 'docker' + } + } + steps { + sh ''' + DEBIAN_FRONTEND=noninteractive && \ + apt-get update && apt-get upgrade -y && apt-get install -y \ + build-essential \ + wget \ + git \ + bc \ + python3-dev \ + && \ + apt-get clean && rm -rf /var/lib/apt/lists/* + + rm -rf spack && \ + git clone https://github.com/spack/spack.git && \ + . ./spack/share/spack/setup-env.sh && \ + spack install kokkos@develop+tests && \ + spack load cmake && \ + spack test run kokkos && \ + spack test results -l + ''' + } + } + stage('spack-cuda') { + agent { + docker { + image 'nvidia/cuda:12.1.0-devel-ubuntu22.04' + label 'nvidia-docker && ampere' + } + } + steps { + sh ''' + DEBIAN_FRONTEND=noninteractive && \ + apt-get update && apt-get upgrade -y && apt-get install -y \ + build-essential \ + wget \ + git \ + bc \ + python3-dev \ + gfortran \ + && \ + apt-get clean && rm -rf /var/lib/apt/lists/* + + rm -rf spack && \ + git clone https://github.com/spack/spack.git && \ + . ./spack/share/spack/setup-env.sh && \ + spack install kokkos@develop+cuda+wrapper+tests cuda_arch=80 ^cuda@12.1.0 && \ + spack load cmake && \ + spack load kokkos-nvcc-wrapper && \ + spack load cuda && \ + spack load kokkos && \ + spack test run kokkos && \ + spack test results -l + ''' + } + } + } + } + } +} diff --git a/BUILD.md b/BUILD.md index b0d603e6db..f80320e78b 100644 --- a/BUILD.md +++ b/BUILD.md @@ -111,247 +111,4 @@ For dev-build details, consult the kokkos-spack repository [README](https://gith # Kokkos Keyword Listing -## Device Backends -Device backends can be enabled by specifying `-DKokkos_ENABLE_X`. - -* Kokkos_ENABLE_CUDA - * Whether to build CUDA backend - * BOOL Default: OFF -* Kokkos_ENABLE_HPX - * Whether to build HPX backend (experimental) - * BOOL Default: OFF -* Kokkos_ENABLE_OPENMP - * Whether to build OpenMP backend - * BOOL Default: OFF -* Kokkos_ENABLE_THREADS - * Whether to build C++ thread backend - * BOOL Default: OFF -* Kokkos_ENABLE_SERIAL - * Whether to build serial backend - * BOOL Default: ON -* Kokkos_ENABLE_HIP (Experimental) - * Whether to build HIP backend - * BOOL Default: OFF -* Kokkos_ENABLE_OPENMPTARGET (Experimental) - * Whether to build the OpenMP target backend - * BOOL Default: OFF - -## Enable Options -Options can be enabled by specifying `-DKokkos_ENABLE_X`. - -* Kokkos_ENABLE_AGGRESSIVE_VECTORIZATION - * Whether to aggressively vectorize loops - * BOOL Default: OFF -* Kokkos_ENABLE_COMPILER_WARNINGS - * Whether to print all compiler warnings - * BOOL Default: OFF -* Kokkos_ENABLE_CUDA_CONSTEXPR - * Whether to activate experimental relaxed constexpr functions - * BOOL Default: OFF -* Kokkos_ENABLE_CUDA_LAMBDA - * Whether to activate experimental lambda features - * BOOL Default: OFF -* Kokkos_ENABLE_CUDA_LDG_INTRINSIC - * Deprecated since 4.0, LDG intrinsics are always enabled. - * Whether to use CUDA LDG intrinsics - * BOOL Default: OFF -* Kokkos_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE - * Whether to enable relocatable device code (RDC) for CUDA - * BOOL Default: OFF -* Kokkos_ENABLE_CUDA_UVM - * Deprecated since 4.0 - * Whether to use unified memory (UM) by default for CUDA - * BOOL Default: OFF -* Kokkos_ENABLE_DEBUG - * Whether to activate extra debug features - may increase compile times - * BOOL Default: OFF -* Kokkos_ENABLE_DEBUG_BOUNDS_CHECK - * Whether to use bounds checking - will increase runtime - * BOOL Default: OFF -* Kokkos_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK - * Debug check on dual views - * BOOL Default: OFF -* Kokkos_ENABLE_EXAMPLES - * Whether to enable building examples - * BOOL Default: OFF -* Kokkos_ENABLE_HPX_ASYNC_DISPATCH - * Whether HPX supports asynchronous dispatch - * BOOL Default: OFF -* Kokkos_ENABLE_IMPL_CUDA_MALLOC_ASYNC - * Whether to enable CudaMallocAsync (requires CUDA Toolkit 11.2). This is an experimental performance feature and currently has issue when using with UCX. See https://github.com/kokkos/kokkos/issues/4228 for more details. - * BOOL Default: OFF -* Kokkos_ENABLE_LARGE_MEM_TESTS - * Whether to perform extra large memory tests - * BOOL_Default: OFF -* Kokkos_ENABLE_PROFILING_LOAD_PRINT - * Whether to print information about which profiling tools gotloaded - * BOOL Default: OFF -* Kokkos_ENABLE_TESTS - * Whether to enable test suite - * BOOL Default: OFF - - -## Third-party Libraries (TPLs) -The following options control enabling TPLs: -* Kokkos_ENABLE_HPX - * Whether to enable the HPX library - * BOOL Default: OFF -* Kokkos_ENABLE_HWLOC - * Whether to enable the HWLOC library - * BOOL Default: Off -* Kokkos_ENABLE_LIBNUMA - * Whether to enable the LIBNUMA library - * BOOL Default: Off -* Kokkos_ENABLE_MEMKIND - * Whether to enable the MEMKIND library - * BOOL Default: Off -* Kokkos_ENABLE_LIBDL - * Whether to enable the LIBDL library - * BOOL Default: On -* Kokkos_ENABLE_LIBRT - * Whether to enable the LIBRT library - * BOOL Default: Off - -The following options control finding and configuring non-CMake TPLs: -* Kokkos_CUDA_DIR or CUDA_ROOT - * Location of CUDA install prefix for libraries - * PATH Default: -* Kokkos_HWLOC_DIR or HWLOC_ROOT - * Location of HWLOC install prefix - * PATH Default: -* Kokkos_LIBNUMA_DIR or LIBNUMA_ROOT - * Location of LIBNUMA install prefix - * PATH Default: -* Kokkos_MEMKIND_DIR or MEMKIND_ROOT - * Location of MEMKIND install prefix - * PATH Default: -* Kokkos_LIBDL_DIR or LIBDL_ROOT - * Location of LIBDL install prefix - * PATH Default: -* Kokkos_LIBRT_DIR or LIBRT_ROOT - * Location of LIBRT install prefix - * PATH Default: - -The following options control `find_package` paths for CMake-based TPLs: -* HPX_DIR or HPX_ROOT - * Location of HPX prefix (ROOT) or CMake config file (DIR) - * PATH Default: - -## Architecture Keywords -Architecture-specific optimizations can be enabled by specifying `-DKokkos_ARCH_X`. - -* Kokkos_ARCH_NATIVE - * Whether to optimize for the the local CPU architecture - * BOOL Default: OFF -* Kokkos_ARCH_AMDAVX - * Whether to optimize for the AMDAVX architecture - * BOOL Default: OFF -* Kokkos_ARCH_ARMV80 - * Whether to optimize for the ARMV80 architecture - * BOOL Default: OFF -* Kokkos_ARCH_ARMV81 - * Whether to optimize for the ARMV81 architecture - * BOOL Default: OFF -* Kokkos_ARCH_ARMV8_THUNDERX - * Whether to optimize for the ARMV8_THUNDERX architecture - * BOOL Default: OFF -* Kokkos_ARCH_ARMV8_TX2 - * Whether to optimize for the ARMV8_TX2 architecture - * BOOL Default: OFF -* Kokkos_ARCH_BDW - * Whether to optimize for the BDW architecture - * BOOL Default: OFF -* Kokkos_ARCH_BGQ - * Whether to optimize for the BGQ architecture - * BOOL Default: OFF -* Kokkos_ARCH_ZEN - * Whether to optimize for the Zen architecture - * BOOL Default: OFF -* Kokkos_ARCH_ZEN2 - * Whether to optimize for the Zen2 architecture - * BOOL Default: OFF -* Kokkos_ARCH_ZEN3 - * Whether to optimize for the Zen3 architecture - * BOOL Default: OFF -* Kokkos_ARCH_HSW - * Whether to optimize for the HSW architecture - * BOOL Default: OFF -* Kokkos_ARCH_KEPLER30 - * Whether to optimize for the KEPLER30 architecture - * BOOL Default: OFF -* Kokkos_ARCH_KEPLER32 - * Whether to optimize for the KEPLER32 architecture - * BOOL Default: OFF -* Kokkos_ARCH_KEPLER35 - * Whether to optimize for the KEPLER35 architecture - * BOOL Default: OFF -* Kokkos_ARCH_KEPLER37 - * Whether to optimize for the KEPLER37 architecture - * BOOL Default: OFF -* Kokkos_ARCH_KNC - * Whether to optimize for the KNC architecture - * BOOL Default: OFF -* Kokkos_ARCH_KNL - * Whether to optimize for the KNL architecture - * BOOL Default: OFF -* Kokkos_ARCH_MAXWELL50 - * Whether to optimize for the MAXWELL50 architecture - * BOOL Default: OFF -* Kokkos_ARCH_MAXWELL52 - * Whether to optimize for the MAXWELL52 architecture - * BOOL Default: OFF -* Kokkos_ARCH_MAXWELL53 - * Whether to optimize for the MAXWELL53 architecture - * BOOL Default: OFF -* Kokkos_ARCH_PASCAL60 - * Whether to optimize for the PASCAL60 architecture - * BOOL Default: OFF -* Kokkos_ARCH_PASCAL61 - * Whether to optimize for the PASCAL61 architecture - * BOOL Default: OFF -* Kokkos_ARCH_POWER7 - * Whether to optimize for the POWER7 architecture - * BOOL Default: OFF -* Kokkos_ARCH_POWER8 - * Whether to optimize for the POWER8 architecture - * BOOL Default: OFF -* Kokkos_ARCH_POWER9 - * Whether to optimize for the POWER9 architecture - * BOOL Default: OFF -* Kokkos_ARCH_ICL - * Whether to optimize for the ICL architecture - * BOOL Default: OFF -* Kokkos_ARCH_ICX - * Whether to optimize for the ICX architecture - * BOOL Default: OFF -* Kokkos_ARCH_SKL - * Whether to optimize for the SKL architecture - * BOOL Default: OFF -* Kokkos_ARCH_SKX - * Whether to optimize for the SKX architecture - * BOOL Default: OFF -* Kokkos_ARCH_SNB - * Whether to optimize for the SNB architecture - * BOOL Default: OFF -* Kokkos_ARCH_SPR - * Whether to optimize for the SPR architecture - * BOOL Default: OFF -* Kokkos_ARCH_TURING75 - * Whether to optimize for the TURING75 architecture - * BOOL Default: OFF -* Kokkos_ARCH_VOLTA70 - * Whether to optimize for the VOLTA70 architecture - * BOOL Default: OFF -* Kokkos_ARCH_VOLTA72 - * Whether to optimize for the VOLTA72 architecture - * BOOL Default: OFF -* Kokkos_ARCH_WSM - * Whether to optimize for the WSM architecture - * BOOL Default: OFF - -##### [LICENSE](https://github.com/kokkos/kokkos/blob/devel/LICENSE) - -[![License](https://img.shields.io/badge/License-BSD%203--Clause-blue.svg)](https://opensource.org/licenses/BSD-3-Clause) - -Under the terms of Contract DE-NA0003525 with NTESS, -the U.S. Government retains certain rights in this software. +Please refer to our [wiki](https://kokkos.github.io/kokkos-core-wiki/keywords.html#cmake-keywords). diff --git a/CHANGELOG.md b/CHANGELOG.md index c3409a9058..4c145c44b3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,13 +1,129 @@ -# Change Log +# CHANGELOG -## [4.0.0](https://github.com/kokkos/kokkos/tree/4.0.0) (2023-02-21) -[Full Changelog](https://github.com/kokkos/kokkos/compare/3.7.01...4.0.0) +## [4.1.00](https://github.com/kokkos/kokkos/tree/4.0.01) (2023-06-16) +[Full Changelog](https://github.com/kokkos/kokkos/compare/4.0.01...4.1.00) + +### Features: +* Add `` header [\#4577](https://github.com/kokkos/kokkos/pull/4577) [\#5907](https://github.com/kokkos/kokkos/pull/5907) [\#5967](https://github.com/kokkos/kokkos/pull/5967) [\#6101](https://github.com/kokkos/kokkos/pull/6101) +* Add `UnorderedMapInsertOpTypes` [\#5877](https://github.com/kokkos/kokkos/pull/5877) and documentation [\#350](https://github.com/kokkos/kokkos-core-wiki/pull/350) +* Add multiple reducers support for team-level parallel reduce [\#5727](https://github.com/kokkos/kokkos/pull/5727) + +### Backend and Architecture Enhancements: + +#### CUDA: + +* Allow NVCC 12 to compile using C++20 flag [\#5977](https://github.com/kokkos/kokkos/pull/5977) +* Remove ability to disable CMake option `Kokkos_ENABLE_CUDA_LAMBDA` and unconditionally enable CUDA extended lambda support. [\#5964](https://github.com/kokkos/kokkos/pull/5964) +* Drop unnecessary fences around the memory allocation when using `CudaUVMSpace` in views [\#6008](https://github.com/kokkos/kokkos/pull/6008) + +#### HIP: +* Improve performance for `parallel_reduce`. Use different parameters for `LightWeight` kernels [\#6029](https://github.com/kokkos/kokkos/pull/6029) and [\#6160](https://github.com/kokkos/kokkos/pull/6160) + +#### SYCL: +* Only pass one wrapper object in SYCL reductions [\#6047](https://github.com/kokkos/kokkos/pull/6047) +* Improve and simplify parallel_scan implementation [\#6064](https://github.com/kokkos/kokkos/pull/6064) +* Remove workaround for submit_barrier not being enqueued properly [\#5504](https://github.com/kokkos/kokkos/pull/5504) +* Fix guards for using scratch space with SYCL [\#6003](https://github.com/kokkos/kokkos/pull/6003) +* Fix compiling SYCL with KOKKOS_IMPL_DO_NOT_USE_PRINTF_USAGE [\#6219](https://github.com/kokkos/kokkos/pull/6219) + +#### OpenMPTarget: +* Improve hierarchical parallelism for Intel architectures [\#6043](https://github.com/kokkos/kokkos/pull/6043) +* Enable Cray compiler for the OpenMPTarget backend. [\#5889](https://github.com/kokkos/kokkos/pull/5889) + +#### HPX: +* Update HPX backend to use HPX's sender/receiver functionality [\#5628](https://github.com/kokkos/kokkos/pull/5628) +* Increase minimum required HPX version to 1.8.0 [\#6132](https://github.com/kokkos/kokkos/pull/6132) +* Implement HPX::in_parallel [\#6143](https://github.com/kokkos/kokkos/pull/6143) + +### General Enhancements +* Export CMake `Kokkos_{CUDA,HIP}_ARCHITECTURES` variables [\#5919](https://github.com/kokkos/kokkos/pull/5919) [\#5925](https://github.com/kokkos/kokkos/pull/5925) +* Add `Kokkos::Profiling::ScopedRegion` [\#5959](https://github.com/kokkos/kokkos/pull/5959) [\#5972](https://github.com/kokkos/kokkos/pull/5972) +* Add support for `View::rank[_dynamic]()`[\#5870](https://github.com/kokkos/kokkos/pull/5870) +* Detect incompatible relocatable device code mode to prevent ODR violations [\#5991](https://github.com/kokkos/kokkos/pull/5991) +* Add (experimental) support for 32-bit Darwin and PPC [\#5916](https://github.com/kokkos/kokkos/pull/5916) +* Add missing half and bhalf specialization of the infinity numeric trait [\#6055](https://github.com/kokkos/kokkos/pull/6055) +* Add `is_dual_view` trait and align further with regular view [\#6120](https://github.com/kokkos/kokkos/pull/6120) +* Allow templated functors in parallel_for, parallel_reduce and parallel_scan [\#5976](https://github.com/kokkos/kokkos/pull/5976) +* Define KOKKOS_COMPILER_INTEL_LLVM and only define at most one KOKKOS_COMPILER* macro [\#5906](https://github.com/kokkos/kokkos/pull/5906) +* Allow linking against build tree [\#6078](https://github.com/kokkos/kokkos/pull/6078) +* Allow passing a temporary std::vector to partition_space [\#6167](https://github.com/kokkos/kokkos/pull/6167) +* `Kokkos` can be used as an external dependency in `Trilinos` [\#6142](https://github.com/kokkos/kokkos/pull/6142), [\#6157](https://github.com/kokkos/kokkos/pull/6157) [\#6163](https://github.com/kokkos/kokkos/pull/6163) +* Left align demangled stacktrace output [\#6191](https://github.com/kokkos/kokkos/pull/6191) +* Improve OpenMP affinity warning to include MPI concerns [\#6185](https://github.com/kokkos/kokkos/pull/6185) + +### Build System Changes +* Drop `Kokkos_ENABLE_LAUNCH_COMPILER` option which had no effect [\#6148](https://github.com/kokkos/kokkos/pull/6148) +* Export variables for relevant Kokkos options with cmake[\#6142](https://github.com/kokkos/kokkos/pull/6142) + +### Incompatibilities (i.e. breaking changes) +* Desul atomics always enabled [\#5801](https://github.com/kokkos/kokkos/pull/5801) +* Drop `KOKKOS_ENABLE_CUDA_ASM*` and `KOKKOS_ENABLE_*_ATOMICS` macros [\#5940](https://github.com/kokkos/kokkos/pull/5940) +* Drop `KOKKOS_ENABLE_RFO_PREFETCH` macro [\#5944](https://github.com/kokkos/kokkos/pull/5944) +* Deprecate `Kokkos_ENABLE_CUDA_LAMBDA` configuration option and force it to `ON` [\#5964](https://github.com/kokkos/kokkos/pull/5964) +* Remove TriBITS Kokkos subpackages [\#6104](https://github.com/kokkos/kokkos/pull/6104) +* Cuda: Remove unused attach_texture_object [\#6129](https://github.com/kokkos/kokkos/pull/6129) +* Drop Kokkos_ENABLE_PROFILING_LOAD_PRINT configuration option [\#6150](https://github.com/kokkos/kokkos/pull/6150) +* Drop pointless Kokkos{Algorithms,Containers}_config.h files [\#6108](https://github.com/kokkos/kokkos/pull/6108) + +### Deprecations +* Deprecate `BinSort`, `BinOp1D`, and `BinOp3D` default constructors [\#6131](https://github.com/kokkos/kokkos/pull/6131) + +### Bug Fixes +* Fix `SYCLTeamMember` to take arguments for scratch sizes as `std::size_t` [\#5981](https://github.com/kokkos/kokkos/pull/5981) +* Fix Kokkos_SIMD with AVX2 on 64-bit architectures [\#6075](https://github.com/kokkos/kokkos/pull/6075) +* Fix an incorrectly returning size for SIMD uint64_t in AVX2 [\#6004](https://github.com/kokkos/kokkos/pull/6004) +* Fix missing avx512 header file with gcc versions before 10 [\#6183](https://github.com/kokkos/kokkos/pull/6183) +* Fix incorrect results of `parallel_reduce` of types smaller than `int` on CUDA and HIP: [\#5745](https://github.com/kokkos/kokkos/pull/5745) +* CMake: update package compatibility mode when building within Trilinos [\#6012](https://github.com/kokkos/kokkos/pull/6012) +* Fix warnings generated from internal uses of `ALL_t` rather than `Kokkos::ALL_t` [\#6028](https://github.com/kokkos/kokkos/pull/6028) +* Fix bug in `hpcbind` script: check for correct Slurm variable [\#6116](https://github.com/kokkos/kokkos/pull/6116) +* KokkosTools: Don't call callbacks before backends are initialized [\#6114](https://github.com/kokkos/kokkos/pull/6114) +* Fix global fence in Kokkos::resize(DynRankView) [\#6184](https://github.com/kokkos/kokkos/pull/6184) +* Fix `BinSort` support for strided views [\#6081](https://github.com/kokkos/kokkos/pull/6184) +* Fix missing `is_*_view` traits in containers [\#6195](https://github.com/kokkos/kokkos/pull/6195) +* Fix broken OpenMP target on NVHPC [\#6171](https://github.com/kokkos/kokkos/pull/6171) +* Sorting an empty view should exit early and not fail [\#6130](https://github.com/kokkos/kokkos/pull/6130) + +## [4.0.01](https://github.com/kokkos/kokkos/tree/4.0.01) (2023-04-14) +[Full Changelog](https://github.com/kokkos/kokkos/compare/4.0.00...4.0.01) + +### Backend and Architecture Enhancements: + +#### CUDA: + +- Allow NVCC 12 to compile using C++20 flag [\#6020](https://github.com/kokkos/kokkos/pull/6020) +- Add CUDA Ada architecture support [\#6022](https://github.com/kokkos/kokkos/pull/6022) + +#### HIP: + +- Add support for AMDGPU target NAVI31 / RX 7900 XT(X): gfx1100 [\#6021](https://github.com/kokkos/kokkos/pull/6021) +- HIP: Fix warning from `std::memcpy` [\#6019](https://github.com/kokkos/kokkos/pull/6019) + +#### SYCL: +- Fix `SYCLTeamMember` to take arguments for scratch sizes as `std::size_t` [\#5986](https://github.com/kokkos/kokkos/pull/5986) + +### General Enhancements +- Fixup 4.0 change log [\#6023](https://github.com/kokkos/kokkos/pull/6023) + +### Build System Changes +- Cherry-pick TriBITS update from Trilinos [\#6037](https://github.com/kokkos/kokkos/pull/6037) +- CMake: update package compatibility mode when building within Trilinos [\#6013](https://github.com/kokkos/kokkos/pull/6013) + +### Bug Fixes +- Fix an incorrectly returning size for SIMD uint64_t in AVX2 [\#6011](https://github.com/kokkos/kokkos/pull/6011) +- Desul atomics: wrong value for `desul::Impl::numeric_limits_max` [\#6018](https://github.com/kokkos/kokkos/pull/6018) +- Fix warning in some user code when using std::memcpy [\#6000](https://github.com/kokkos/kokkos/pull/6000) +- Fix excessive build times using Makefile.kokkos [\#6068](https://github.com/kokkos/kokkos/pull/6068) + +## [4.0.0](https://github.com/kokkos/kokkos/tree/4.0.00) (2023-02-21) +[Full Changelog](https://github.com/kokkos/kokkos/compare/3.7.01...4.0.00) ### Features: - Allow value types without default constructor in `Kokkos::View` with `Kokkos::WithoutInitializing` [\#5307](https://github.com/kokkos/kokkos/pull/5307) - `parallel_scan` with `View` as result type. [\#5146](https://github.com/kokkos/kokkos/pull/5146) - Introduced `SharedSpace`, an alias for a `MemorySpace` that is accessible by every `ExecutionSpace`. The memory is moved and then accessed locally. [\#5289](https://github.com/kokkos/kokkos/pull/5289) - Introduced `SharedHostPinnedSpace`, an alias for a `MemorySpace` that is accessible by every `ExecutionSpace`. The memory is pinned to the host and accessed via zero-copy access. [\#5405](https://github.com/kokkos/kokkos/pull/5405) +- Add team- and thread-level `sort`, `sort_by_key` algorithms. [\#5317](https://github.com/kokkos/kokkos/pull/5317) - Groundwork for `MDSpan` integration. [\#4973](https://github.com/kokkos/kokkos/pull/4973) and [\#5304](https://github.com/kokkos/kokkos/pull/5304) - Introduced MD version of hierarchical parallelism: `TeamThreadMDRange`, `ThreadVectorMDRange` and `TeamVectorMDRange`. [\#5238](https://github.com/kokkos/kokkos/pull/5238) @@ -72,23 +188,12 @@ - Remove Kokkos_ENABLE_CUDA_LDG_INTRINSIC option [\#5623](https://github.com/kokkos/kokkos/pull/5623) - Don't rely on synchronization behavior of default stream in CUDA and HIP - this potentially will break unintended implicit synchronization with other libraries such as MPI [\#5391](https://github.com/kokkos/kokkos/pull/5391) - Make ExecutionSpace::concurrency() a non-static member function [\#5655](https://github.com/kokkos/kokkos/pull/5655) and related PRs +- Remove code guarded by `KOKKOS_ENABLE_DEPRECATED_CODE_3` ### Deprecations -- Guard against non-public header inclusion [\#5178](https://github.com/kokkos/kokkos/pull/5178) -- Raise deprecation warnings if non empty WorkTag class is used [\#5230](https://github.com/kokkos/kokkos/pull/5230) -- Deprecate `parallel_*` overloads taking the label as trailing argument [\#5141](https://github.com/kokkos/kokkos/pull/5141) -- Deprecate nested types in functional [\#5185](https://github.com/kokkos/kokkos/pull/5185) -- Deprecate `InitArguments` struct and replace it with `InitializationSettings` [\#5135](https://github.com/kokkos/kokkos/pull/5135) -- Deprecate `finalize_all()` [\#5134](https://github.com/kokkos/kokkos/pull/5134) -- Deprecate command line arguments (other than `--help`) that are not prefixed with `kokkos-*` [\#5120](https://github.com/kokkos/kokkos/pull/5120) -- Deprecate `--[kokkos-]numa` cmdline arg and `KOKKOS_NUMA` env var [\#5117](https://github.com/kokkos/kokkos/pull/5117) -- Deprecate `--[kokkos-]threads` command line argument in favor of `--[kokkos-]num-threads` [\#5111](https://github.com/kokkos/kokkos/pull/5111) -- Deprecate `Kokkos::is_reducer_type` [\#4957](https://github.com/kokkos/kokkos/pull/4957) -- Deprecate `OffsetView` constructors taking `index_list_type` [\#4810](https://github.com/kokkos/kokkos/pull/4810) -- Deprecate overloads of `Kokkos::sort` taking a parameter `bool always_use_kokkos_sort` [\#5382](https://github.com/kokkos/kokkos/issues/5382) - Deprecate `CudaUVMSpace::available()` which always returned `true` [\#5614](https://github.com/kokkos/kokkos/pull/5614) - Deprecate `volatile`-qualified members from `Kokkos::pair` and `Kokkos::complex` [\#5412](https://github.com/kokkos/kokkos/pull/5412) -- Deprecate `KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_*` macros [\#5824](https://github.com/kokkos/kokkos/pull/5824) (oversight in 3.2) +- Deprecate `KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_*` macros [\#5824](https://github.com/kokkos/kokkos/pull/5824) (oversight in 3.6) ### Bug Fixes - Avoid allocating memory for `UniqueToken` [\#5300](https://github.com/kokkos/kokkos/pull/5300) @@ -103,6 +208,27 @@ - Don't install standard algorithms headers multiple times [\#5670](https://github.com/kokkos/kokkos/pull/5670) - Fix max scratch size calculation for level 0 scratch in CUDA and HIP [\#5718](https://github.com/kokkos/kokkos/pull/5718) +## [3.7.02](https://github.com/kokkos/kokkos/tree/3.7.02) (2023-05-17) +[Full Changelog](https://github.com/kokkos/kokkos/compare/3.7.01...3.7.02) + +### Backends and Archs Enhancements: +#### CUDA +- Add Hopper support and update nvcc_wrapper to work with CUDA-12 [\#5693](https://github.com/kokkos/kokkos/pull/5693) +### General Enhancements: +- sprintf -> snprintf [\#5787](https://github.com/kokkos/kokkos/pull/5787) +### Build System: +- Add error message when not using `hipcc` and when `CMAKE_CXX_STANDARD` is not set [\#5945](https://github.com/kokkos/kokkos/pull/5945) +### Bug Fixes: +- Fix Scratch allocation alignment issues [\#5692](https://github.com/kokkos/kokkos/pull/5692) +- Fix Intel Classic Compiler ICE [\#5710](https://github.com/kokkos/kokkos/pull/5710) +- Don't install std algorithm headers multiple times [\#5711](https://github.com/kokkos/kokkos/pull/5711) +- Fix static init order issue in InitalizationSettings [\#5721](https://github.com/kokkos/kokkos/pull/5721) +- Fix src/dst Properties in deep_copy(DynamicView,View) [\#5732](https://github.com/kokkos/kokkos/pull/5732) +- Fix build on Fedora Rawhide [\#5782](https://github.com/kokkos/kokkos/pull/5782) +- Finalize HIP lock arrays [\#5694](https://github.com/kokkos/kokkos/pull/5694) +- Fix CUDA lock arrays for current Desul [\#5812](https://github.com/kokkos/kokkos/pull/5812) +- Set the correct device/context in InterOp tests [\#5701](https://github.com/kokkos/kokkos/pull/5701) + ## [3.7.01](https://github.com/kokkos/kokkos/tree/3.7.01) (2022-12-01) [Full Changelog](https://github.com/kokkos/kokkos/compare/3.7.00...3.7.01) diff --git a/CMakeLists.txt b/CMakeLists.txt index 02ebcf9e24..895cee6a08 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -5,13 +5,16 @@ if( "${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_BINARY_DIR}" ) message( FATAL_ERROR "FATAL: In-source builds are not allowed. You should create a separate directory for build files and delete CMakeCache.txt." ) endif() +if (COMMAND TRIBITS_PACKAGE) + TRIBITS_PACKAGE(Kokkos) +endif() + # We want to determine if options are given with the wrong case # In order to detect which arguments are given to compare against # the list of valid arguments, at the beginning here we need to # form a list of all the given variables. If it begins with any # case of KoKkOS, we add it to the list. - GET_CMAKE_PROPERTY(_variableNames VARIABLES) SET(KOKKOS_GIVEN_VARIABLES) FOREACH (var ${_variableNames}) @@ -34,6 +37,8 @@ IF(COMMAND TRIBITS_PACKAGE_DECL) SET(KOKKOS_HAS_TRILINOS ON) ELSE() SET(KOKKOS_HAS_TRILINOS OFF) + SET(PACKAGE_NAME Kokkos) + SET(PACKAGE_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}") ENDIF() # Is this build a subdirectory of another project GET_DIRECTORY_PROPERTY(HAS_PARENT PARENT_DIRECTORY) @@ -123,6 +128,8 @@ IF(NOT KOKKOS_HAS_TRILINOS) FORCE) ENDIF() ENDIF() +ELSE() + SET(KOKKOS_COMPILE_LANGUAGE CXX) ENDIF() IF (NOT CMAKE_SIZEOF_VOID_P) @@ -133,14 +140,20 @@ IF (NOT CMAKE_SIZEOF_VOID_P) MESSAGE(FATAL_ERROR "Kokkos did not configure correctly and failed to validate compiler. The most likely cause is linkage errors during CMake compiler validation. Please consult the CMake error log shown below for the exact error during compiler validation") ENDIF() ELSEIF (NOT CMAKE_SIZEOF_VOID_P EQUAL 8) - MESSAGE(FATAL_ERROR "Kokkos assumes a 64-bit build; i.e., 8-byte pointers, but found ${CMAKE_SIZEOF_VOID_P}-byte pointers instead") + IF(CMAKE_SIZEOF_VOID_P EQUAL 4) + MESSAGE(WARNING "32-bit builds are experimental and not officially supported.") + SET(KOKKOS_IMPL_32BIT ON) + ELSE() + MESSAGE(FATAL_ERROR "Kokkos assumes a 64-bit build, i.e., 8-byte pointers, but found ${CMAKE_SIZEOF_VOID_P}-byte pointers instead;") + ENDIF() ENDIF() set(Kokkos_VERSION_MAJOR 4) -set(Kokkos_VERSION_MINOR 0) -set(Kokkos_VERSION_PATCH 0) +set(Kokkos_VERSION_MINOR 1) +set(Kokkos_VERSION_PATCH 00) set(Kokkos_VERSION "${Kokkos_VERSION_MAJOR}.${Kokkos_VERSION_MINOR}.${Kokkos_VERSION_PATCH}") +message(STATUS "Kokkos version: ${Kokkos_VERSION}") math(EXPR KOKKOS_VERSION "${Kokkos_VERSION_MAJOR} * 10000 + ${Kokkos_VERSION_MINOR} * 100 + ${Kokkos_VERSION_PATCH}") # mathematical expressions below are not stricly necessary but they eliminate # the rather aggravating leading 0 in the releases patch version number, and, @@ -288,8 +301,6 @@ IF (KOKKOS_HAS_TRILINOS) $<$:${KOKKOS_ALL_COMPILE_OPTIONS}>) ENDIF() -KOKKOS_PACKAGE_DECL() - #------------------------------------------------------------------------------ # @@ -303,7 +314,6 @@ KOKKOS_PROCESS_SUBPACKAGES() # E) If Kokkos itself is enabled, process the Kokkos package # -KOKKOS_PACKAGE_DEF() KOKKOS_EXCLUDE_AUTOTOOLS_FILES() KOKKOS_PACKAGE_POSTPROCESS() KOKKOS_CONFIGURE_CORE() @@ -313,6 +323,8 @@ IF (NOT KOKKOS_HAS_TRILINOS AND NOT Kokkos_INSTALL_TESTING) #Make sure in-tree projects can reference this as Kokkos:: #to match the installed target names ADD_LIBRARY(Kokkos::kokkos ALIAS kokkos) + # all_libs target is required for TriBITS-compliance + ADD_LIBRARY(Kokkos::all_libs ALIAS kokkos) TARGET_LINK_LIBRARIES(kokkos INTERFACE ${KOKKOS_COMPONENT_LIBRARIES}) KOKKOS_INTERNAL_ADD_LIBRARY_INSTALL(kokkos) ENDIF() diff --git a/Makefile.kokkos b/Makefile.kokkos index a55e3428cf..9436b75b9e 100644 --- a/Makefile.kokkos +++ b/Makefile.kokkos @@ -1,8 +1,8 @@ # Default settings common options. KOKKOS_VERSION_MAJOR = 4 -KOKKOS_VERSION_MINOR = 0 -KOKKOS_VERSION_PATCH = 0 +KOKKOS_VERSION_MINOR = 1 +KOKKOS_VERSION_PATCH = 00 KOKKOS_VERSION = $(shell echo $(KOKKOS_VERSION_MAJOR)*10000+$(KOKKOS_VERSION_MINOR)*100+$(KOKKOS_VERSION_PATCH) | bc) # Options: Cuda,HIP,SYCL,OpenMPTarget,OpenMP,Threads,Serial @@ -10,7 +10,7 @@ KOKKOS_VERSION = $(shell echo $(KOKKOS_VERSION_MAJOR)*10000+$(KOKKOS_VERSION_MIN KOKKOS_DEVICES ?= "Threads" # Options: # Intel: KNC,KNL,SNB,HSW,BDW,SKL,SKX,ICL,ICX,SPR -# NVIDIA: Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal60,Pascal61,Volta70,Volta72,Turing75,Ampere80,Ampere86,Hopper90 +# NVIDIA: Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal60,Pascal61,Volta70,Volta72,Turing75,Ampere80,Ampere86,Ada89,Hopper90 # ARM: ARMv80,ARMv81,ARMv8-ThunderX,ARMv8-TX2,A64FX # IBM: BGQ,Power7,Power8,Power9 # AMD-GPUS: Vega906,Vega908,Vega90A,Navi1030 @@ -23,7 +23,7 @@ KOKKOS_DEBUG ?= "no" KOKKOS_USE_TPLS ?= "" # Options: c++17,c++1z,c++20,c++2a,c++23,c++2b KOKKOS_CXX_STANDARD ?= "c++17" -# Options: aggressive_vectorization,disable_profiling,enable_large_mem_tests,disable_complex_align,disable_deprecated_code,enable_deprecation_warnings,disable_desul_atomics +# Options: aggressive_vectorization,disable_profiling,enable_large_mem_tests,disable_complex_align,disable_deprecated_code,enable_deprecation_warnings KOKKOS_OPTIONS ?= "" KOKKOS_CMAKE ?= "no" KOKKOS_TRIBITS ?= "no" @@ -75,7 +75,6 @@ KOKKOS_INTERNAL_AGGRESSIVE_VECTORIZATION := $(call kokkos_has_string,$(KOKKOS_OP KOKKOS_INTERNAL_ENABLE_TUNING := $(call kokkos_has_string,$(KOKKOS_OPTIONS),enable_tuning) KOKKOS_INTERNAL_DISABLE_COMPLEX_ALIGN := $(call kokkos_has_string,$(KOKKOS_OPTIONS),disable_complex_align) KOKKOS_INTERNAL_DISABLE_DUALVIEW_MODIFY_CHECK := $(call kokkos_has_string,$(KOKKOS_OPTIONS),disable_dualview_modify_check) -KOKKOS_INTERNAL_ENABLE_PROFILING_LOAD_PRINT := $(call kokkos_has_string,$(KOKKOS_OPTIONS),enable_profile_load_print) KOKKOS_INTERNAL_ENABLE_LARGE_MEM_TESTS := $(call kokkos_has_string,$(KOKKOS_OPTIONS),enable_large_mem_tests) # deprecated KOKKOS_INTERNAL_CUDA_USE_LDG := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS),use_ldg) @@ -86,6 +85,7 @@ KOKKOS_INTERNAL_CUDA_USE_CONSTEXPR := $(call kokkos_has_string,$(KOKKOS_CUDA_OPT KOKKOS_INTERNAL_HPX_ENABLE_ASYNC_DISPATCH := $(call kokkos_has_string,$(KOKKOS_HPX_OPTIONS),enable_async_dispatch) # deprecated KOKKOS_INTERNAL_ENABLE_DESUL_ATOMICS := $(call kokkos_has_string,$(KOKKOS_OPTIONS),enable_desul_atomics) +# deprecated KOKKOS_INTERNAL_DISABLE_DESUL_ATOMICS := $(call kokkos_has_string,$(KOKKOS_OPTIONS),disable_desul_atomics) KOKKOS_INTERNAL_DISABLE_BUNDLED_MDSPAN := $(call kokkos_has_string,$(KOKKOS_OPTIONS),impl_disable_bundled_mdspan) KOKKOS_INTERNAL_DISABLE_DEPRECATED_CODE := $(call kokkos_has_string,$(KOKKOS_OPTIONS),disable_deprecated_code) @@ -265,15 +265,16 @@ else KOKKOS_INTERNAL_OPENMP_FLAG := -fopenmp endif endif -ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) - #KOKKOS_INTERNAL_OPENMPTARGET_FLAG := -DKOKKOS_BUG_WORKAROUND_IBM_CLANG_OMP45_VIEW_INIT -fopenmp-implicit-declare-target -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp -fopenmp=libomp - KOKKOS_INTERNAL_OPENMPTARGET_FLAG := -DKOKKOS_WORKAROUND_OPENMPTARGET_CLANG -fopenmp -fopenmp=libomp -Wno-openmp-mapping - KOKKOS_INTERNAL_OPENMPTARGET_LIB := -lomptarget -else ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL_CLANG), 1) - KOKKOS_INTERNAL_OPENMPTARGET_FLAG := -fiopenmp -Wno-openmp-mapping -else - #Assume GCC - KOKKOS_INTERNAL_OPENMPTARGET_FLAG := -fopenmp -foffload=nvptx-none + +ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1) + ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL_CLANG), 1) + KOKKOS_INTERNAL_OPENMPTARGET_FLAG := -fiopenmp -Wno-openmp-mapping + else ifeq ($(KOKKOS_INTERNAL_COMPILER_NVHPC), 1) + KOKKOS_INTERNAL_OPENMPTARGET_FLAG := -mp=gpu + else ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 0) + #Assume GCC + KOKKOS_INTERNAL_OPENMPTARGET_FLAG := -fopenmp -foffload=nvptx-none + endif endif ifeq ($(KOKKOS_INTERNAL_USE_OPENACC), 1) @@ -341,6 +342,7 @@ KOKKOS_INTERNAL_USE_ARCH_VOLTA72 := $(call kokkos_has_string,$(KOKKOS_ARCH),Volt KOKKOS_INTERNAL_USE_ARCH_TURING75 := $(call kokkos_has_string,$(KOKKOS_ARCH),Turing75) KOKKOS_INTERNAL_USE_ARCH_AMPERE80 := $(call kokkos_has_string,$(KOKKOS_ARCH),Ampere80) KOKKOS_INTERNAL_USE_ARCH_AMPERE86 := $(call kokkos_has_string,$(KOKKOS_ARCH),Ampere86) +KOKKOS_INTERNAL_USE_ARCH_ADA89 := $(call kokkos_has_string,$(KOKKOS_ARCH),Ada89) KOKKOS_INTERNAL_USE_ARCH_HOPPER90 := $(call kokkos_has_string,$(KOKKOS_ARCH),Hopper90) KOKKOS_INTERNAL_USE_ARCH_NVIDIA := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_KEPLER30) \ + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER32) \ @@ -356,6 +358,7 @@ KOKKOS_INTERNAL_USE_ARCH_NVIDIA := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_KEPLE + $(KOKKOS_INTERNAL_USE_ARCH_TURING75) \ + $(KOKKOS_INTERNAL_USE_ARCH_AMPERE80) \ + $(KOKKOS_INTERNAL_USE_ARCH_AMPERE86) \ + + $(KOKKOS_INTERNAL_USE_ARCH_ADA89) \ + $(KOKKOS_INTERNAL_USE_ARCH_HOPPER90)) #SEK: This seems like a bug to me @@ -574,10 +577,6 @@ ifeq ($(KOKKOS_INTERNAL_DISABLE_COMPLEX_ALIGN), 0) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_COMPLEX_ALIGN") endif -ifeq ($(KOKKOS_INTERNAL_ENABLE_PROFILING_LOAD_PRINT), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_PROFILING_LOAD_PRINT") -endif - ifeq ($(KOKKOS_INTERNAL_ENABLE_TUNING), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_TUNING") endif @@ -666,15 +665,13 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) endif endif - ifeq ($(KOKKOS_INTERNAL_CUDA_USE_LAMBDA), 1) - ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CUDA_LAMBDA") - KOKKOS_CXXFLAGS += -expt-extended-lambda - endif + ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CUDA_LAMBDA") + KOKKOS_CXXFLAGS += -extended-lambda + endif - ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CUDA_LAMBDA") - endif + ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CUDA_LAMBDA") endif ifeq ($(KOKKOS_INTERNAL_CUDA_USE_CONSTEXPR), 1) @@ -695,7 +692,7 @@ endif ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1) ifeq ($(KOKKOS_INTERNAL_HPX_ENABLE_ASYNC_DISPATCH), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_HPX_ASYNC_DISPATCH") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_IMPL_HPX_ASYNC_DISPATCH") endif endif @@ -971,134 +968,144 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA_ARCH), 1) endif ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1) - ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) - KOKKOS_INTERNAL_CUDA_ARCH_FLAG=-fopenmp-targets=nvptx64 -Xopenmp-target -march + ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY_CLANG), 1) + KOKKOS_INTERNAL_CUDA_ARCH_FLAG=-fopenmp + else ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) + KOKKOS_INTERNAL_CUDA_ARCH_FLAG=-fopenmp --offload-arch endif - KOKKOS_INTERNAL_USE_CUDA_ARCH = 1 endif -ifeq ($(KOKKOS_INTERNAL_USE_CUDA_ARCH), 1) - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER30), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER30") - KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_30 - endif - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER32), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER32") - KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_32 - endif - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER35), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER35") - KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_35 - endif - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER37), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER37") - KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_37 - endif - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_MAXWELL") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_MAXWELL50") - KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_50 - endif - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_MAXWELL") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_MAXWELL52") - KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_52 - endif - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_MAXWELL") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_MAXWELL53") - KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_53 - endif - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_PASCAL60), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_PASCAL") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_PASCAL60") - KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_60 - endif - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_PASCAL61), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_PASCAL") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_PASCAL61") - KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_61 - endif - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VOLTA70), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VOLTA") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VOLTA70") - KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_70 - endif - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VOLTA72), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VOLTA") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VOLTA72") - KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_72 - endif - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_TURING75), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_TURING") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_TURING75") - KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_75 - endif - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMPERE80), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMPERE") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMPERE80") - KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_80 - endif - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMPERE86), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMPERE") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMPERE86") - KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_86 - endif - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_HOPPER90), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_HOPPER") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_HOPPER90") - KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_90 +# Do not add this flag if its the cray compiler or the nvhpc compiler. +ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY_CLANG), 0) + ifeq ($(KOKKOS_INTERNAL_COMPILER_NVHPC), 0) + # Lets start with adding architecture defines + ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER30), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER30") + KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_30 + endif + ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER32), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER32") + KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_32 + endif + ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER35), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER35") + KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_35 + endif + ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER37), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER37") + KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_37 + endif + ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_MAXWELL") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_MAXWELL50") + KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_50 + endif + ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_MAXWELL") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_MAXWELL52") + KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_52 + endif + ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_MAXWELL") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_MAXWELL53") + KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_53 + endif + ifeq ($(KOKKOS_INTERNAL_USE_ARCH_PASCAL60), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_PASCAL") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_PASCAL60") + KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_60 + endif + ifeq ($(KOKKOS_INTERNAL_USE_ARCH_PASCAL61), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_PASCAL") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_PASCAL61") + KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_61 + endif + ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VOLTA70), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VOLTA") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VOLTA70") + KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_70 + endif + ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VOLTA72), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VOLTA") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VOLTA72") + KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_72 + endif + ifeq ($(KOKKOS_INTERNAL_USE_ARCH_TURING75), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_TURING75") + KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_75 + endif + ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMPERE80), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMPERE") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMPERE80") + KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_80 + endif + ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMPERE86), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMPERE") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMPERE86") + KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_86 + endif + ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ADA89), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ADA89") + KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_89 + endif + ifeq ($(KOKKOS_INTERNAL_USE_ARCH_HOPPER90), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_HOPPER") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_HOPPER90") + KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_90 + endif endif +endif - ifneq ($(KOKKOS_INTERNAL_USE_ARCH_NVIDIA), 0) - KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG) +ifneq ($(KOKKOS_INTERNAL_USE_ARCH_NVIDIA), 0) + KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG) - ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1) + ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1) + KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG) + endif + ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) + ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1) KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG) endif - ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) - ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1) - KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG) - endif - endif endif endif # Figure out the architecture flag for ROCm. -ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1) - # Lets start with adding architecture defines - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VEGA906), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA906") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA") - KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx906 - endif - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VEGA908), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA908") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA") - KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx908 - endif - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VEGA90A), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA90A") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA") - KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx90a - endif - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_NAVI1030), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_NAVI1030") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_NAVI") - KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx1030 - endif +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VEGA906), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA906") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA") + KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx906 +endif +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VEGA908), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA908") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA") + KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx908 +endif +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VEGA90A), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA90A") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA") + KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx90a +endif +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_NAVI1030), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_NAVI1030") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_NAVI") + KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx1030 +endif +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_NAVI1100), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_NAVI1100") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_NAVI") + KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx1100 +endif +ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1) KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/HIP/*.cpp) + KOKKOS_SRC += $(KOKKOS_PATH)/tpls/desul/src/Lock_Array_HIP.cpp KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/HIP/*.hpp) - ifeq ($(KOKKOS_INTERNAL_DISABLE_DESUL_ATOMICS), 0) - KOKKOS_SRC += $(KOKKOS_PATH)/tpls/desul/src/Lock_Array_HIP.cpp - endif KOKKOS_CXXFLAGS+=$(KOKKOS_INTERNAL_HIP_ARCH_FLAG) KOKKOS_LDFLAGS+=$(KOKKOS_INTERNAL_HIP_ARCH_FLAG) @@ -1171,12 +1178,14 @@ ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1) KOKKOS_LDFLAGS+=$(KOKKOS_INTERNAL_INTEL_ARCH_FLAG) endif -ifeq ($(KOKKOS_INTERNAL_DISABLE_DESUL_ATOMICS), 0) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_IMPL_DESUL_ATOMICS") - KOKKOS_CPPFLAGS+=-I$(KOKKOS_PATH)/tpls/desul/include -else ifeq ($(KOKKOS_INTERNAL_ENABLE_DESUL_ATOMICS), 1) - $(error Contradictory Desul atomics options: KOKKOS_OPTIONS=$(KOKKOS_OPTIONS) ) +ifeq ($(KOKKOS_INTERNAL_DISABLE_DESUL_ATOMICS), 1) + $(warning disable_desul_atomics option has been removed. Desul atomics cannot be disabled.) + KOKKOS_INTERNAL_DISABLE_DESUL_ATOMICS := 0 +endif +ifeq ($(KOKKOS_INTERNAL_ENABLE_DESUL_ATOMICS), 1) + $(warning enable_desul_atomics option has been removed. Desul atomics are always enabled.) endif +KOKKOS_CPPFLAGS+=-I$(KOKKOS_PATH)/tpls/desul/include ifeq ($(KOKKOS_INTERNAL_DISABLE_BUNDLED_MDSPAN), 0) KOKKOS_CPPFLAGS+=-I$(KOKKOS_PATH)/tpls/mdspan/include @@ -1218,6 +1227,7 @@ ifneq ($(KOKKOS_INTERNAL_NEW_CONFIG), 0) ifeq ($(KOKKOS_INTERNAL_USE_SYCL), 1) tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_FwdBackend.hpp") tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_DeclareBackend.hpp") + tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_SetupBackend.hpp") endif ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1) tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_FwdBackend.hpp") @@ -1229,8 +1239,8 @@ ifneq ($(KOKKOS_INTERNAL_NEW_CONFIG), 0) tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_DeclareBackend.hpp") endif ifeq ($(KOKKOS_INTERNAL_USE_OPENACC), 1) - tmp := $(call kokkos_append_config_header,"\#include ","KokkosCore_Config_FwdBackend.hpp") - tmp := $(call kokkos_append_config_header,"\#include ","KokkosCore_Config_DeclareBackend.hpp") + tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_FwdBackend.hpp") + tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_DeclareBackend.hpp") endif ifeq ($(KOKKOS_INTERNAL_USE_THREADS), 1) tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_FwdBackend.hpp") @@ -1261,9 +1271,7 @@ KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/containers/src/impl/*.cpp) ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.cpp) - ifeq ($(KOKKOS_INTERNAL_DISABLE_DESUL_ATOMICS), 0) - KOKKOS_SRC += $(KOKKOS_PATH)/tpls/desul/src/Lock_Array_CUDA.cpp - endif + KOKKOS_SRC += $(KOKKOS_PATH)/tpls/desul/src/Lock_Array_CUDA.cpp KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.hpp) ifneq ($(CUDA_PATH),) KOKKOS_CPPLAGS += -I$(CUDA_PATH)/include @@ -1379,11 +1387,7 @@ KOKKOS_LIBS := -lkokkos ${KOKKOS_LIBS} # Generating the header DESUL_INTERNAL_CONFIG_TMP=Desul_Config.tmp -ifeq ($(KOKKOS_INTERNAL_DISABLE_DESUL_ATOMICS), 0) - DESUL_CONFIG_HEADER=desul/atomics/Config.hpp -else - DESUL_CONFIG_HEADER=NothingToSeeHereMoveAlong -endif +DESUL_CONFIG_HEADER=desul/atomics/Config.hpp desul_append_header = $(shell echo $1 >> $(DESUL_INTERNAL_CONFIG_TMP)) tmp := $(call desul_append_header, "// generated by on-demand build system by crtrott" > $(DESUL_INTERNAL_CONFIG_TMP)) tmp := $(call desul_append_header, "$H""ifndef DESUL_ATOMICS_CONFIG_HPP_") @@ -1394,12 +1398,22 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) else tmp := $(call desul_append_header,"/* $H""undef DESUL_ATOMICS_ENABLE_CUDA */") endif +ifeq ($(KOKKOS_INTERNAL_CUDA_USE_RELOC), 1) + tmp := $(call desul_append_header,"$H""define DESUL_ATOMICS_ENABLE_CUDA_SEPARABLE_COMPILATION") +else + tmp := $(call desul_append_header,"/* $H""undef DESUL_ATOMICS_ENABLE_CUDA_SEPARABLE_COMPILATION */") +endif ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1) tmp := $(call desul_append_header,"$H""define DESUL_ATOMICS_ENABLE_HIP") else tmp := $(call desul_append_header,"/* $H""undef DESUL_ATOMICS_ENABLE_HIP */") endif +ifeq ($(KOKKOS_INTERNAL_HIP_USE_RELOC), 1) + tmp := $(call desul_append_header,"$H""define DESUL_ATOMICS_ENABLE_HIP_SEPARABLE_COMPILATION") +else + tmp := $(call desul_append_header,"/* $H""undef DESUL_ATOMICS_ENABLE_HIP_SEPARABLE_COMPILATION */") +endif ifeq ($(KOKKOS_INTERNAL_USE_SYCL), 1) tmp := $(call desul_append_header,"$H""define DESUL_ATOMICS_ENABLE_SYCL") @@ -1418,7 +1432,7 @@ tmp := $(call desul_append_header, "$H""endif") DESUL_INTERNAL_LS_CONFIG := $(shell ls $(DESUL_CONFIG_HEADER) 2>&1) ifeq ($(DESUL_INTERNAL_LS_CONFIG), $(DESUL_CONFIG_HEADER)) - KOKKOS_INTERNAL_NEW_CONFIG := $(strip $(shell diff $(DESUL_CONFIG_HEADER) $(DESUL_INTERNAL_CONFIG_TMP) | grep -c define)) + DESUL_INTERNAL_NEW_CONFIG := $(strip $(shell diff $(DESUL_CONFIG_HEADER) $(DESUL_INTERNAL_CONFIG_TMP) | grep -c define)) else DESUL_INTERNAL_NEW_CONFIG := 1 endif diff --git a/Makefile.targets b/Makefile.targets index 32b1fab261..4e08a46c69 100644 --- a/Makefile.targets +++ b/Makefile.targets @@ -51,8 +51,6 @@ Kokkos_CudaSpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cu $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_CudaSpace.cpp Kokkos_Cuda_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Task.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Task.cpp -Kokkos_Cuda_Locks.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Locks.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Locks.cpp Lock_Array_CUDA.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/tpls/desul/src/Lock_Array_CUDA.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/tpls/desul/src/Lock_Array_CUDA.cpp endif @@ -77,8 +75,6 @@ Kokkos_HIP_Space.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP_Space.cpp Kokkos_HIP_Instance.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP_Instance.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP_Instance.cpp -Kokkos_HIP_Locks.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP_Locks.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP_Locks.cpp Lock_Array_HIP.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/tpls/desul/src/Lock_Array_HIP.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/tpls/desul/src/Lock_Array_HIP.cpp endif @@ -89,6 +85,8 @@ Kokkos_ThreadsExec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokk endif ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) +Kokkos_OpenMP.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP.cpp Kokkos_OpenMP_Instance.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Instance.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Instance.cpp Kokkos_OpenMP_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Task.cpp diff --git a/algorithms/CMakeLists.txt b/algorithms/CMakeLists.txt index f32363dc9a..ab557ab66a 100644 --- a/algorithms/CMakeLists.txt +++ b/algorithms/CMakeLists.txt @@ -1,7 +1,3 @@ - - -KOKKOS_SUBPACKAGE(Algorithms) - IF (NOT Kokkos_INSTALL_TESTING) ADD_SUBDIRECTORY(src) ENDIF() @@ -9,7 +5,3 @@ ENDIF() IF(NOT ((KOKKOS_ENABLE_OPENMPTARGET OR KOKKOS_ENABLE_OPENACC) AND KOKKOS_CXX_COMPILER_ID STREQUAL NVHPC)) KOKKOS_ADD_TEST_DIRECTORIES(unit_tests) ENDIF() - -KOKKOS_SUBPACKAGE_POSTPROCESS() - - diff --git a/algorithms/cmake/Dependencies.cmake b/algorithms/cmake/Dependencies.cmake deleted file mode 100644 index c36b62523f..0000000000 --- a/algorithms/cmake/Dependencies.cmake +++ /dev/null @@ -1,5 +0,0 @@ -TRIBITS_PACKAGE_DEFINE_DEPENDENCIES( - LIB_REQUIRED_PACKAGES KokkosCore KokkosContainers - LIB_OPTIONAL_TPLS Pthread CUDA HWLOC - TEST_OPTIONAL_TPLS CUSPARSE - ) diff --git a/algorithms/cmake/KokkosAlgorithms_config.h.in b/algorithms/cmake/KokkosAlgorithms_config.h.in deleted file mode 100644 index 67334b70f3..0000000000 --- a/algorithms/cmake/KokkosAlgorithms_config.h.in +++ /dev/null @@ -1,4 +0,0 @@ -#ifndef KOKKOS_ALGORITHMS_CONFIG_H -#define KOKKOS_ALGORITHMS_CONFIG_H - -#endif diff --git a/algorithms/src/CMakeLists.txt b/algorithms/src/CMakeLists.txt index 606d83d18b..1695778947 100644 --- a/algorithms/src/CMakeLists.txt +++ b/algorithms/src/CMakeLists.txt @@ -1,6 +1,3 @@ - -KOKKOS_CONFIGURE_FILE(${PACKAGE_NAME}_config.h) - #I have to leave these here for tribits KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) @@ -9,7 +6,6 @@ KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) FILE(GLOB ALGO_HEADERS *.hpp) FILE(GLOB ALGO_SOURCES *.cpp) -LIST(APPEND ALGO_HEADERS ${CMAKE_CURRENT_BINARY_DIR}/${PACKAGE_NAME}_config.h) APPEND_GLOB(ALGO_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/std_algorithms/*.hpp) APPEND_GLOB(ALGO_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/std_algorithms/impl/*.hpp) diff --git a/algorithms/src/Kokkos_Random.hpp b/algorithms/src/Kokkos_Random.hpp index 91e9ce6fc8..abb028d28e 100644 --- a/algorithms/src/Kokkos_Random.hpp +++ b/algorithms/src/Kokkos_Random.hpp @@ -1514,7 +1514,7 @@ void fill_random(const ExecutionSpace& exec, ViewType a, RandomPool g, "Kokkos::fill_random", Kokkos::RangePolicy(exec, 0, (LDA + 127) / 128), Impl::fill_random_functor_begin_end( + ViewType::rank, IndexType>( a, g, begin, end)); } diff --git a/algorithms/src/Kokkos_Sort.hpp b/algorithms/src/Kokkos_Sort.hpp index 033de22164..10f9ad6462 100644 --- a/algorithms/src/Kokkos_Sort.hpp +++ b/algorithms/src/Kokkos_Sort.hpp @@ -66,11 +66,16 @@ #endif +#if defined(KOKKOS_ENABLE_ONEDPL) +#include +#include +#endif + namespace Kokkos { namespace Impl { -template +template struct CopyOp; template @@ -141,8 +146,12 @@ class BinSort { Kokkos::is_view::value, Kokkos::View >, + typename SrcViewType::device_type +#if !defined(KOKKOS_COMPILER_NVHPC) // FIXME_NVHPC + , + Kokkos::MemoryTraits +#endif + >, typename SrcViewType::const_type>; using perm_view_type = typename PermuteViewType::const_type; @@ -221,7 +230,11 @@ class BinSort { bool sort_within_bins; public: - BinSort() = default; +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 + KOKKOS_DEPRECATED BinSort() = default; +#else + BinSort() = delete; +#endif //---------------------------------------- // Constructor: takes the keys, the binning_operator and optionally whether to @@ -324,6 +337,10 @@ class BinSort { template void sort(const ExecutionSpace& exec, ValuesViewType const& values, int values_range_begin, int values_range_end) const { + if (values.extent(0) == 0) { + return; + } + static_assert( Kokkos::SpaceAccessibility::accessible, @@ -335,11 +352,6 @@ class BinSort { "The provided execution space must be able to access the memory space " "of the View argument!"); - using scratch_view_type = - Kokkos::View; - const size_t len = range_end - range_begin; const size_t values_len = values_range_end - values_range_begin; if (len != values_len) { @@ -347,6 +359,9 @@ class BinSort { "BinSort::sort: values range length != permutation vector length"); } + using scratch_view_type = + Kokkos::View; scratch_view_type sorted_values( view_alloc(exec, WithoutInitializing, "Kokkos::SortImpl::BinSortFunctor::sorted_values"), @@ -451,24 +466,29 @@ class BinSort { void operator()(const bin_sort_bins_tag& /*tag*/, const int i) const { auto bin_size = bin_count_const(i); if (bin_size <= 1) return; - int upper_bound = bin_offsets(i) + bin_size; - bool sorted = false; - while (!sorted) { - sorted = true; - int old_idx = sort_order(bin_offsets(i)); - int new_idx = 0; - for (int k = bin_offsets(i) + 1; k < upper_bound; k++) { - new_idx = sort_order(k); - - if (!bin_op(keys_rnd, old_idx, new_idx)) { - sort_order(k - 1) = new_idx; - sort_order(k) = old_idx; - sorted = false; - } else { - old_idx = new_idx; + constexpr bool use_std_sort = + std::is_same_v; + int lower_bound = bin_offsets(i); + int upper_bound = lower_bound + bin_size; + // Switching to std::sort for more than 10 elements has been found + // reasonable experimentally. + if (use_std_sort && bin_size > 10) { + if constexpr (use_std_sort) { + std::sort(&sort_order(lower_bound), &sort_order(upper_bound), + [this](int p, int q) { return bin_op(keys_rnd, p, q); }); + } + } else { + for (int k = lower_bound + 1; k < upper_bound; ++k) { + int old_idx = sort_order(k); + int j = k - 1; + while (j >= lower_bound) { + int new_idx = sort_order(j); + if (!bin_op(keys_rnd, old_idx, new_idx)) break; + sort_order(j + 1) = new_idx; + --j; } + sort_order(j + 1) = old_idx; } - upper_bound--; } } }; @@ -481,7 +501,11 @@ struct BinOp1D { double mul_ = {}; double min_ = {}; - BinOp1D() = default; +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 + KOKKOS_DEPRECATED BinOp1D() = default; +#else + BinOp1D() = delete; +#endif // Construct BinOp with number of bins, minimum value and maximum value BinOp1D(int max_bins__, typename KeyViewType::const_value_type min, @@ -525,7 +549,11 @@ struct BinOp3D { double mul_[3] = {}; double min_[3] = {}; - BinOp3D() = default; +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 + KOKKOS_DEPRECATED BinOp3D() = default; +#else + BinOp3D() = delete; +#endif BinOp3D(int max_bins__[], typename KeyViewType::const_value_type min[], typename KeyViewType::const_value_type max[]) { @@ -596,6 +624,10 @@ std::enable_if_t<(Kokkos::is_execution_space::value) && memory_space>::accessible)> sort(const ExecutionSpace& exec, const Kokkos::View& view) { + if (view.extent(0) == 0) { + return; + } + using ViewType = Kokkos::View; using CompType = BinOp1D; @@ -634,12 +666,44 @@ sort(const ExecutionSpace& exec, bin_sort.sort(exec, view); } +#if defined(KOKKOS_ENABLE_ONEDPL) +template +void sort(const Experimental::SYCL& space, + const Kokkos::View& view) { + if (view.extent(0) == 0) { + return; + } + + using ViewType = Kokkos::View; + static_assert(SpaceAccessibility::accessible, + "SYCL execution space is not able to access the memory space " + "of the View argument!"); + + auto queue = space.sycl_queue(); + auto policy = oneapi::dpl::execution::make_device_policy(queue); + + // Can't use Experimental::begin/end here since the oneDPL then assumes that + // the data is on the host. + static_assert( + ViewType::rank == 1 && + (std::is_same::value || + std::is_same::value), + "SYCL sort only supports contiguous 1D Views."); + const int n = view.extent(0); + oneapi::dpl::sort(policy, view.data(), view.data() + n); +} +#endif + template std::enable_if_t<(Kokkos::is_execution_space::value) && (SpaceAccessibility< HostSpace, typename Kokkos::View:: memory_space>::accessible)> sort(const ExecutionSpace&, const Kokkos::View& view) { + if (view.extent(0) == 0) { + return; + } auto first = Experimental::begin(view); auto last = Experimental::end(view); std::sort(first, last); @@ -649,6 +713,9 @@ sort(const ExecutionSpace&, const Kokkos::View& view) { template void sort(const Cuda& space, const Kokkos::View& view) { + if (view.extent(0) == 0) { + return; + } const auto exec = thrust::cuda::par.on(space.cuda_stream()); auto first = Experimental::begin(view); auto last = Experimental::end(view); @@ -659,6 +726,11 @@ void sort(const Cuda& space, template void sort(ViewType const& view) { Kokkos::fence("Kokkos::sort: before"); + + if (view.extent(0) == 0) { + return; + } + typename ViewType::execution_space exec; sort(exec, view); exec.fence("Kokkos::sort: fence after sorting"); @@ -668,6 +740,10 @@ template std::enable_if_t::value> sort( const ExecutionSpace& exec, ViewType view, size_t const begin, size_t const end) { + if (view.extent(0) == 0) { + return; + } + using range_policy = Kokkos::RangePolicy; using CompType = BinOp1D; @@ -690,6 +766,11 @@ std::enable_if_t::value> sort( template void sort(ViewType view, size_t const begin, size_t const end) { Kokkos::fence("Kokkos::sort: before"); + + if (view.extent(0) == 0) { + return; + } + typename ViewType::execution_space exec; sort(exec, view, begin, end); exec.fence("Kokkos::Sort: fence after sorting"); diff --git a/algorithms/src/std_algorithms/impl/Kokkos_AdjacentFind.hpp b/algorithms/src/std_algorithms/impl/Kokkos_AdjacentFind.hpp index cc6b63f028..dd785e603b 100644 --- a/algorithms/src/std_algorithms/impl/Kokkos_AdjacentFind.hpp +++ b/algorithms/src/std_algorithms/impl/Kokkos_AdjacentFind.hpp @@ -42,12 +42,13 @@ struct StdAdjacentFindFunctor { const auto& next_value = m_first[i + 1]; const bool are_equal = m_p(my_value, next_value); - auto rv = - are_equal - ? red_value_type{i} - : red_value_type{::Kokkos::reduction_identity::min()}; + // FIXME_NVHPC using a ternary operator causes problems + red_value_type value = {::Kokkos::reduction_identity::min()}; + if (are_equal) { + value.min_loc_true = i; + } - m_reducer.join(red_value, rv); + m_reducer.join(red_value, value); } KOKKOS_FUNCTION diff --git a/algorithms/src/std_algorithms/impl/Kokkos_Constraints.hpp b/algorithms/src/std_algorithms/impl/Kokkos_Constraints.hpp index 52e7625e4d..0376100410 100644 --- a/algorithms/src/std_algorithms/impl/Kokkos_Constraints.hpp +++ b/algorithms/src/std_algorithms/impl/Kokkos_Constraints.hpp @@ -29,7 +29,7 @@ struct is_admissible_to_kokkos_std_algorithms : std::false_type {}; template struct is_admissible_to_kokkos_std_algorithms< - T, std::enable_if_t< ::Kokkos::is_view::value && T::rank == 1 && + T, std::enable_if_t< ::Kokkos::is_view::value && T::rank() == 1 && (std::is_same::value || std::is_same::max()}; + // FIXME_NVHPC using a ternary operator causes problems + red_value_type rv = {::Kokkos::reduction_identity::max()}; + if (found) { + rv.max_loc_true = i; + } m_reducer.join(red_value, rv); } diff --git a/algorithms/src/std_algorithms/impl/Kokkos_FindFirstOf.hpp b/algorithms/src/std_algorithms/impl/Kokkos_FindFirstOf.hpp index df10da2fd5..5f22d2ad13 100644 --- a/algorithms/src/std_algorithms/impl/Kokkos_FindFirstOf.hpp +++ b/algorithms/src/std_algorithms/impl/Kokkos_FindFirstOf.hpp @@ -52,10 +52,11 @@ struct StdFindFirstOfFunctor { } } - const auto rv = - found ? red_value_type{i} - : red_value_type{::Kokkos::reduction_identity::min()}; - + // FIXME_NVHPC using a ternary operator causes problems + red_value_type rv = {::Kokkos::reduction_identity::min()}; + if (found) { + rv.min_loc_true = i; + } m_reducer.join(red_value, rv); } diff --git a/algorithms/src/std_algorithms/impl/Kokkos_FindIfOrNot.hpp b/algorithms/src/std_algorithms/impl/Kokkos_FindIfOrNot.hpp index f7ec4b1110..9c0b0c0ccd 100644 --- a/algorithms/src/std_algorithms/impl/Kokkos_FindIfOrNot.hpp +++ b/algorithms/src/std_algorithms/impl/Kokkos_FindIfOrNot.hpp @@ -44,10 +44,11 @@ struct StdFindIfOrNotFunctor { // if doing find_if_not, look for when predicate is false const bool found_condition = is_find_if ? m_p(my_value) : !m_p(my_value); - auto rv = - found_condition - ? red_value_type{i} - : red_value_type{::Kokkos::reduction_identity::min()}; + // FIXME_NVHPC using a ternary operator causes problems + red_value_type rv = {::Kokkos::reduction_identity::min()}; + if (found_condition) { + rv.min_loc_true = i; + } m_reducer.join(red_value, rv); } diff --git a/algorithms/src/std_algorithms/impl/Kokkos_InclusiveScan.hpp b/algorithms/src/std_algorithms/impl/Kokkos_InclusiveScan.hpp index 55e1a78695..ecd6ff39cd 100644 --- a/algorithms/src/std_algorithms/impl/Kokkos_InclusiveScan.hpp +++ b/algorithms/src/std_algorithms/impl/Kokkos_InclusiveScan.hpp @@ -90,6 +90,8 @@ struct InclusiveScanDefaultFunctor { KOKKOS_FUNCTION void join(value_type& update, const value_type& input) const { + if (input.is_initial) return; + if (update.is_initial) { update.val = input.val; } else { diff --git a/algorithms/src/std_algorithms/impl/Kokkos_IsPartitioned.hpp b/algorithms/src/std_algorithms/impl/Kokkos_IsPartitioned.hpp index 92a22f3c3a..0fe2d246ff 100644 --- a/algorithms/src/std_algorithms/impl/Kokkos_IsPartitioned.hpp +++ b/algorithms/src/std_algorithms/impl/Kokkos_IsPartitioned.hpp @@ -43,8 +43,12 @@ struct StdIsPartitionedFunctor { ::Kokkos::reduction_identity::min(); constexpr index_type m_red_id_max = ::Kokkos::reduction_identity::max(); - auto rv = predicate_value ? red_value_type{i, m_red_id_min} - : red_value_type{m_red_id_max, i}; + + // FIXME_NVHPC using a ternary operator causes problems + red_value_type rv = {m_red_id_max, i}; + if (predicate_value) { + rv = {i, m_red_id_min}; + } m_reducer.join(redValue, rv); } diff --git a/algorithms/src/std_algorithms/impl/Kokkos_IsSortedUntil.hpp b/algorithms/src/std_algorithms/impl/Kokkos_IsSortedUntil.hpp index fe52e18a33..2a0c112bf5 100644 --- a/algorithms/src/std_algorithms/impl/Kokkos_IsSortedUntil.hpp +++ b/algorithms/src/std_algorithms/impl/Kokkos_IsSortedUntil.hpp @@ -28,33 +28,30 @@ namespace Kokkos { namespace Experimental { namespace Impl { -template +template struct StdIsSortedUntilFunctor { using index_type = typename IteratorType::difference_type; + using value_type = typename ReducerType::value_type; + IteratorType m_first; - IndicatorViewType m_indicator; ComparatorType m_comparator; + ReducerType m_reducer; KOKKOS_FUNCTION - void operator()(const index_type i, int& update, const bool final) const { + void operator()(const index_type i, value_type& reduction_result) const { const auto& val_i = m_first[i]; const auto& val_ip1 = m_first[i + 1]; - if (m_comparator(val_ip1, val_i)) { - ++update; - } - - if (final) { - m_indicator(i) = update; + m_reducer.join(reduction_result, i); } } KOKKOS_FUNCTION - StdIsSortedUntilFunctor(IteratorType _first1, IndicatorViewType indicator, - ComparatorType comparator) - : m_first(std::move(_first1)), - m_indicator(std::move(indicator)), - m_comparator(std::move(comparator)) {} + StdIsSortedUntilFunctor(IteratorType first, ComparatorType comparator, + ReducerType reducer) + : m_first(std::move(first)), + m_comparator(std::move(comparator)), + m_reducer(std::move(reducer)) {} }; template @@ -73,40 +70,31 @@ IteratorType is_sorted_until_impl(const std::string& label, } /* - use scan and a helper "indicator" view - such that we scan the data and fill the indicator with - partial sum that is always 0 unless we find a pair that - breaks the sorting, so in that case the indicator will - have a 1 starting at the location where the sorting breaks. - So finding that 1 means finding the location we want. - */ - - // aliases - using indicator_value_type = std::size_t; - using indicator_view_type = - ::Kokkos::View; - using functor_type = - StdIsSortedUntilFunctor; - - // do scan - // use num_elements-1 because each index handles i and i+1 - const auto num_elements_minus_one = num_elements - 1; - indicator_view_type indicator("is_sorted_until_indicator_helper", - num_elements_minus_one); - ::Kokkos::parallel_scan( - label, RangePolicy(ex, 0, num_elements_minus_one), - functor_type(first, indicator, std::move(comp))); - - // try to find the first sentinel value, which indicates - // where the sorting condition breaks - namespace KE = ::Kokkos::Experimental; - constexpr indicator_value_type sentinel_value = 1; - auto r = - KE::find(ex, KE::cbegin(indicator), KE::cend(indicator), sentinel_value); - const auto shift = r - ::Kokkos::Experimental::cbegin(indicator); - - return first + (shift + 1); + Do a par_reduce computing the *min* index that breaks the sorting. + If such an index is found, then the range is sorted until that element. + If no such index is found, then the range is sorted until the end. + */ + using index_type = typename IteratorType::difference_type; + index_type reduction_result; + ::Kokkos::Min reducer(reduction_result); + ::Kokkos::parallel_reduce( + label, + // use num_elements-1 because each index handles i and i+1 + RangePolicy(ex, 0, num_elements - 1), + // use CTAD + StdIsSortedUntilFunctor(first, comp, reducer), reducer); + + /* If the reduction result is equal to the initial value, + it means the range is sorted until the end */ + index_type reduction_result_init; + reducer.init(reduction_result_init); + if (reduction_result == reduction_result_init) { + return last; + } else { + /* If such an index is found, then the range is sorted until there and + we need to return an iterator past the element found so do +1 */ + return first + (reduction_result + 1); + } } template diff --git a/algorithms/src/std_algorithms/impl/Kokkos_LexicographicalCompare.hpp b/algorithms/src/std_algorithms/impl/Kokkos_LexicographicalCompare.hpp index 170ec9f291..ad7f59232e 100644 --- a/algorithms/src/std_algorithms/impl/Kokkos_LexicographicalCompare.hpp +++ b/algorithms/src/std_algorithms/impl/Kokkos_LexicographicalCompare.hpp @@ -63,12 +63,14 @@ struct StdLexicographicalCompareFunctor { const auto& my_value1 = m_first1[i]; const auto& my_value2 = m_first2[i]; - bool different = m_comparator(my_value1, my_value2) || - m_comparator(my_value2, my_value1); - auto rv = - different - ? red_value_type{i} - : red_value_type{::Kokkos::reduction_identity::min()}; + const bool different = m_comparator(my_value1, my_value2) || + m_comparator(my_value2, my_value1); + + // FIXME_NVHPC using a ternary operator causes problems + red_value_type rv = {::Kokkos::reduction_identity::min()}; + if (different) { + rv.min_loc_true = i; + } m_reducer.join(red_value, rv); } diff --git a/algorithms/src/std_algorithms/impl/Kokkos_Mismatch.hpp b/algorithms/src/std_algorithms/impl/Kokkos_Mismatch.hpp index 9d2e31f63f..b742684467 100644 --- a/algorithms/src/std_algorithms/impl/Kokkos_Mismatch.hpp +++ b/algorithms/src/std_algorithms/impl/Kokkos_Mismatch.hpp @@ -42,10 +42,11 @@ struct StdMismatchRedFunctor { const auto& my_value1 = m_first1[i]; const auto& my_value2 = m_first2[i]; - auto rv = - !m_predicate(my_value1, my_value2) - ? red_value_type{i} - : red_value_type{::Kokkos::reduction_identity::min()}; + // FIXME_NVHPC using a ternary operator causes problems + red_value_type rv = {i}; + if (m_predicate(my_value1, my_value2)) { + rv = {::Kokkos::reduction_identity::min()}; + } m_reducer.join(red_value, rv); } diff --git a/algorithms/src/std_algorithms/impl/Kokkos_PartitionCopy.hpp b/algorithms/src/std_algorithms/impl/Kokkos_PartitionCopy.hpp index 5457ae2508..54f7c5b612 100644 --- a/algorithms/src/std_algorithms/impl/Kokkos_PartitionCopy.hpp +++ b/algorithms/src/std_algorithms/impl/Kokkos_PartitionCopy.hpp @@ -31,25 +31,6 @@ template struct StdPartitionCopyScalar { ValueType true_count_; ValueType false_count_; - - // Here we implement the copy assignment operators explicitly for consistency - // with how the Scalar structs are implemented inside - // Kokkos_Parallel_Reduce.hpp. - KOKKOS_FUNCTION - void operator=(const StdPartitionCopyScalar& other) { - true_count_ = other.true_count_; - false_count_ = other.false_count_; - } - - // this is needed for - // OpenMPTarget/Kokkos_OpenMPTarget_Parallel.hpp:699:21: error: no viable - // overloaded '=' m_returnvalue = 0; - // - KOKKOS_FUNCTION - void operator=(const ValueType value) { - true_count_ = value; - false_count_ = value; - } }; template ::min()} - : red_value_type{i}; + + // FIXME_NVHPC using a ternary operator causes problems + red_value_type rv = {i}; + if (predicate_value) { + rv = {::Kokkos::reduction_identity::min()}; + } + m_reducer.join(redValue, rv); } diff --git a/algorithms/src/std_algorithms/impl/Kokkos_Reverse.hpp b/algorithms/src/std_algorithms/impl/Kokkos_Reverse.hpp index a4aaba26b9..7c75899cb8 100644 --- a/algorithms/src/std_algorithms/impl/Kokkos_Reverse.hpp +++ b/algorithms/src/std_algorithms/impl/Kokkos_Reverse.hpp @@ -39,16 +39,7 @@ struct StdReverseFunctor { KOKKOS_FUNCTION void operator()(index_type i) const { - // the swap below is doing the same thing, but - // for Intel 18.0.5 does not work. - // But putting the impl directly here, it works. -#ifdef KOKKOS_COMPILER_INTEL - typename InputIterator::value_type tmp = std::move(m_first[i]); - m_first[i] = std::move(m_last[-i - 1]); - m_last[-i - 1] = std::move(tmp); -#else ::Kokkos::Experimental::swap(m_first[i], m_last[-i - 1]); -#endif } StdReverseFunctor(InputIterator first, InputIterator last) diff --git a/algorithms/src/std_algorithms/impl/Kokkos_Search.hpp b/algorithms/src/std_algorithms/impl/Kokkos_Search.hpp index a612a57231..2780151f29 100644 --- a/algorithms/src/std_algorithms/impl/Kokkos_Search.hpp +++ b/algorithms/src/std_algorithms/impl/Kokkos_Search.hpp @@ -60,9 +60,11 @@ struct StdSearchFunctor { } } - const auto rv = - found ? red_value_type{i} - : red_value_type{::Kokkos::reduction_identity::min()}; + // FIXME_NVHPC using a ternary operator causes problems + red_value_type rv = {::Kokkos::reduction_identity::min()}; + if (found) { + rv = {i}; + } m_reducer.join(red_value, rv); } diff --git a/algorithms/src/std_algorithms/impl/Kokkos_SearchN.hpp b/algorithms/src/std_algorithms/impl/Kokkos_SearchN.hpp index 0d3b6bc706..98640136d4 100644 --- a/algorithms/src/std_algorithms/impl/Kokkos_SearchN.hpp +++ b/algorithms/src/std_algorithms/impl/Kokkos_SearchN.hpp @@ -59,9 +59,11 @@ struct StdSearchNFunctor { } } - const auto rv = - found ? red_value_type{i} - : red_value_type{::Kokkos::reduction_identity::min()}; + // FIXME_NVHPC using a ternary operator causes problems + red_value_type rv = {::Kokkos::reduction_identity::min()}; + if (found) { + rv.min_loc_true = i; + } m_reducer.join(red_value, rv); } diff --git a/algorithms/src/std_algorithms/impl/Kokkos_SwapRanges.hpp b/algorithms/src/std_algorithms/impl/Kokkos_SwapRanges.hpp index 438acb989f..a5e4786d04 100644 --- a/algorithms/src/std_algorithms/impl/Kokkos_SwapRanges.hpp +++ b/algorithms/src/std_algorithms/impl/Kokkos_SwapRanges.hpp @@ -35,16 +35,7 @@ struct StdSwapRangesFunctor { KOKKOS_FUNCTION void operator()(IndexType i) const { - // the swap below is doing the same thing, but - // for Intel 18.0.5 does not work. - // But putting the impl directly here, it works. -#ifdef KOKKOS_COMPILER_INTEL - typename IteratorType1::value_type tmp = std::move(m_first1[i]); - m_first1[i] = std::move(m_first2[i]); - m_first2[i] = std::move(tmp); -#else ::Kokkos::Experimental::swap(m_first1[i], m_first2[i]); -#endif } KOKKOS_FUNCTION diff --git a/algorithms/src/std_algorithms/impl/Kokkos_TransformExclusiveScan.hpp b/algorithms/src/std_algorithms/impl/Kokkos_TransformExclusiveScan.hpp index 773e8c2f88..3bb337de36 100644 --- a/algorithms/src/std_algorithms/impl/Kokkos_TransformExclusiveScan.hpp +++ b/algorithms/src/std_algorithms/impl/Kokkos_TransformExclusiveScan.hpp @@ -76,6 +76,8 @@ struct TransformExclusiveScanFunctor { KOKKOS_FUNCTION void join(value_type& update, const value_type& input) const { + if (input.is_initial) return; + if (update.is_initial) { update.val = input.val; } else { diff --git a/algorithms/src/std_algorithms/impl/Kokkos_TransformInclusiveScan.hpp b/algorithms/src/std_algorithms/impl/Kokkos_TransformInclusiveScan.hpp index 9dde2b0fb1..05f8589086 100644 --- a/algorithms/src/std_algorithms/impl/Kokkos_TransformInclusiveScan.hpp +++ b/algorithms/src/std_algorithms/impl/Kokkos_TransformInclusiveScan.hpp @@ -67,6 +67,8 @@ struct TransformInclusiveScanNoInitValueFunctor { KOKKOS_FUNCTION void join(value_type& update, const value_type& input) const { + if (input.is_initial) return; + if (update.is_initial) { update.val = input.val; } else { @@ -118,6 +120,8 @@ struct TransformInclusiveScanWithInitValueFunctor { KOKKOS_FUNCTION void join(value_type& update, const value_type& input) const { + if (input.is_initial) return; + if (update.is_initial) { update.val = input.val; } else { diff --git a/algorithms/src/std_algorithms/impl/Kokkos_ValueWrapperForNoNeutralElement.hpp b/algorithms/src/std_algorithms/impl/Kokkos_ValueWrapperForNoNeutralElement.hpp index 9b0d4d8244..8a73b8e0f1 100644 --- a/algorithms/src/std_algorithms/impl/Kokkos_ValueWrapperForNoNeutralElement.hpp +++ b/algorithms/src/std_algorithms/impl/Kokkos_ValueWrapperForNoNeutralElement.hpp @@ -29,12 +29,6 @@ template struct ValueWrapperForNoNeutralElement { Scalar val; bool is_initial = true; - - KOKKOS_FUNCTION - void operator=(const ValueWrapperForNoNeutralElement& rhs) { - val = rhs.val; - is_initial = rhs.is_initial; - } }; } // namespace Impl diff --git a/algorithms/unit_tests/CMakeLists.txt b/algorithms/unit_tests/CMakeLists.txt index 0fe9c2006e..92d9f072c1 100644 --- a/algorithms/unit_tests/CMakeLists.txt +++ b/algorithms/unit_tests/CMakeLists.txt @@ -16,35 +16,45 @@ foreach(Tag Threads;Serial;OpenMP;Cuda;HPX;HIP;SYCL;OpenMPTarget) set(dir ${CMAKE_CURRENT_BINARY_DIR}/${dir}) file(MAKE_DIRECTORY ${dir}) - # ------------------------- - # Sort1d,3d, Random - # ------------------------- - set(SOURCES_A) - if(Tag STREQUAL "OpenMP") - LIST(APPEND SOURCES_A - TestOpenMP_Sort1D.cpp - TestOpenMP_Sort3D.cpp - TestOpenMP_SortDynamicView.cpp - ) - endif() - + # ------------------------------------------ + # Sort + # ------------------------------------------ # Each of these inputs is an .hpp file. # Generate a .cpp file for each one that runs it on the current backend (Tag), # and add this .cpp file to the sources for UnitTest_RandomAndSort. - foreach(SOURCES_A_Input - TestRandomCommon - TestSortCommon - TestNestedSort - ) - set(file ${dir}/${SOURCES_A_Input}.cpp) + set(ALGO_SORT_SOURCES) + foreach(SOURCE_Input + TestSort + TestBinSortA + TestBinSortB + TestNestedSort + ) + set(file ${dir}/${SOURCE_Input}.cpp) # Write to a temporary intermediate file and call configure_file to avoid # updating timestamps triggering unnecessary rebuilds on subsequent cmake runs. file(WRITE ${dir}/dummy.cpp "#include \n" - "#include <${SOURCES_A_Input}.hpp>\n" + "#include <${SOURCE_Input}.hpp>\n" + ) + configure_file(${dir}/dummy.cpp ${file}) + list(APPEND ALGO_SORT_SOURCES ${file}) + endforeach() + + # ------------------------------------------ + # Random + # ------------------------------------------ + # do as above + set(ALGO_RANDOM_SOURCES) + foreach(SOURCE_Input + TestRandom + ) + set(file ${dir}/${SOURCE_Input}.cpp) + file(WRITE ${dir}/dummy.cpp + "#include \n" + "#include <${SOURCE_Input}.hpp>\n" ) configure_file(${dir}/dummy.cpp ${file}) - list(APPEND SOURCES_A ${file}) + list(APPEND ALGO_RANDOM_SOURCES ${file}) endforeach() # ------------------------------------------ @@ -145,6 +155,26 @@ foreach(Tag Threads;Serial;OpenMP;Cuda;HPX;HIP;SYCL;OpenMPTarget) endif() endforeach() +# FIXME_OPENMPTARGET This test causes internal compiler errors as of 09/01/22 +# when compiling for Intel's Xe-HP GPUs. +# FRIZZI: 04/26/2023: not sure if the compilation error is still applicable +# but we conservatively leave this guard on +if(NOT (KOKKOS_ENABLE_OPENMPTARGET AND KOKKOS_CXX_COMPILER_ID STREQUAL IntelLLVM)) + KOKKOS_ADD_EXECUTABLE_AND_TEST( + UnitTest_Sort + SOURCES + UnitTestMain.cpp + ${ALGO_SORT_SOURCES} + ) + + KOKKOS_ADD_EXECUTABLE_AND_TEST( + UnitTest_Random + SOURCES + UnitTestMain.cpp + ${ALGO_RANDOM_SOURCES} + ) +endif() + # FIXME_OPENMPTARGET These tests cause internal compiler errors as of 09/01/22 # when compiling for Intel's Xe-HP GPUs. if(KOKKOS_ENABLE_OPENMPTARGET AND KOKKOS_CXX_COMPILER_ID STREQUAL IntelLLVM) @@ -160,20 +190,9 @@ if(KOKKOS_ENABLE_OPENMPTARGET AND KOKKOS_CXX_COMPILER_ID STREQUAL IntelLLVM) ) endif() -# FIXME_OPENMPTARGET This test causes internal compiler errors as of 09/01/22 -# when compiling for Intel's Xe-HP GPUs. -if(NOT (KOKKOS_ENABLE_OPENMPTARGET AND KOKKOS_CXX_COMPILER_ID STREQUAL IntelLLVM)) - KOKKOS_ADD_EXECUTABLE_AND_TEST( - UnitTest_RandomAndSort - SOURCES - UnitTestMain.cpp - ${SOURCES_A} - ) -endif() - foreach(ID A;B;C;D;E) KOKKOS_ADD_EXECUTABLE_AND_TEST( - UnitTest_StdSet_${ID} + AlgorithmsUnitTest_StdSet_${ID} SOURCES UnitTestMain.cpp ${STDALGO_SOURCES_${ID}} @@ -184,7 +203,7 @@ endforeach() # when compiling for Intel's Xe-HP GPUs. if(NOT (KOKKOS_ENABLE_OPENMPTARGET AND KOKKOS_CXX_COMPILER_ID STREQUAL IntelLLVM)) KOKKOS_ADD_EXECUTABLE( - UnitTest_StdAlgoCompileOnly + AlgorithmsUnitTest_StdAlgoCompileOnly SOURCES TestStdAlgorithmsCompileOnly.cpp ) endif() diff --git a/algorithms/unit_tests/Makefile b/algorithms/unit_tests/Makefile index e961e7ba2c..9e0f1d60a0 100644 --- a/algorithms/unit_tests/Makefile +++ b/algorithms/unit_tests/Makefile @@ -27,10 +27,8 @@ TARGETS = tmp := $(foreach device, $(KOKKOS_DEVICELIST), \ $(if $(filter Test$(device).cpp, $(shell ls Test$(device).cpp 2>/dev/null)),,\ - $(shell echo "\#include " > Test$(device).cpp); \ - $(shell echo "\#include " >> Test$(device).cpp); \ - $(shell echo "\#include " >> Test$(device).cpp); \ - ) \ + $(shell echo "\#include " > Test$(device).cpp); \ + ) \ ) ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) @@ -52,7 +50,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_THREADS), 1) endif ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) - OBJ_OPENMP = TestOpenMP.o TestOpenMP_Sort1D.o TestOpenMP_Sort3D.o TestOpenMP_SortDynamicView.o UnitTestMain.o gtest-all.o + OBJ_OPENMP = TestOpenMP.o UnitTestMain.o gtest-all.o TARGETS += KokkosAlgorithms_UnitTest_OpenMP TEST_TARGETS += test-openmp endif diff --git a/algorithms/unit_tests/TestBinSortA.hpp b/algorithms/unit_tests/TestBinSortA.hpp new file mode 100644 index 0000000000..46f6486cdc --- /dev/null +++ b/algorithms/unit_tests/TestBinSortA.hpp @@ -0,0 +1,280 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KOKKOS_ALGORITHMS_UNITTESTS_TEST_BINSORTA_HPP +#define KOKKOS_ALGORITHMS_UNITTESTS_TEST_BINSORTA_HPP + +#include +#include +#include +#include +#include + +namespace Test { +namespace BinSortSetA { + +template +struct bin3d_is_sorted_struct { + using value_type = unsigned int; + using execution_space = ExecutionSpace; + + Kokkos::View keys; + + int max_bins; + Scalar min; + Scalar max; + + bin3d_is_sorted_struct(Kokkos::View keys_, + int max_bins_, Scalar min_, Scalar max_) + : keys(keys_), max_bins(max_bins_), min(min_), max(max_) {} + KOKKOS_INLINE_FUNCTION + void operator()(int i, unsigned int& count) const { + int ix1 = int((keys(i, 0) - min) / max * max_bins); + int iy1 = int((keys(i, 1) - min) / max * max_bins); + int iz1 = int((keys(i, 2) - min) / max * max_bins); + int ix2 = int((keys(i + 1, 0) - min) / max * max_bins); + int iy2 = int((keys(i + 1, 1) - min) / max * max_bins); + int iz2 = int((keys(i + 1, 2) - min) / max * max_bins); + + if (ix1 > ix2) + count++; + else if (ix1 == ix2) { + if (iy1 > iy2) + count++; + else if ((iy1 == iy2) && (iz1 > iz2)) + count++; + } + } +}; + +template +struct sum3D { + using value_type = double; + using execution_space = ExecutionSpace; + + Kokkos::View keys; + + sum3D(Kokkos::View keys_) : keys(keys_) {} + KOKKOS_INLINE_FUNCTION + void operator()(int i, double& count) const { + count += keys(i, 0); + count += keys(i, 1); + count += keys(i, 2); + } +}; + +template +void test_3D_sort_impl(unsigned int n) { + using KeyViewType = Kokkos::View; + + KeyViewType keys("Keys", n * n * n); + + Kokkos::Random_XorShift64_Pool g(1931); + Kokkos::fill_random(keys, g, 100.0); + + double sum_before = 0.0; + double sum_after = 0.0; + unsigned int sort_fails = 0; + + ExecutionSpace exec; + Kokkos::parallel_reduce( + Kokkos::RangePolicy(exec, 0, keys.extent(0)), + sum3D(keys), sum_before); + + int bin_1d = 1; + while (bin_1d * bin_1d * bin_1d * 4 < (int)keys.extent(0)) bin_1d *= 2; + int bin_max[3] = {bin_1d, bin_1d, bin_1d}; + typename KeyViewType::value_type min[3] = {0, 0, 0}; + typename KeyViewType::value_type max[3] = {100, 100, 100}; + + using BinOp = Kokkos::BinOp3D; + BinOp bin_op(bin_max, min, max); + Kokkos::BinSort Sorter(keys, bin_op, false); + Sorter.create_permute_vector(exec); + Sorter.sort(exec, keys); + + Kokkos::parallel_reduce( + Kokkos::RangePolicy(exec, 0, keys.extent(0)), + sum3D(keys), sum_after); + Kokkos::parallel_reduce( + Kokkos::RangePolicy(exec, 0, keys.extent(0) - 1), + bin3d_is_sorted_struct(keys, bin_1d, min[0], + max[0]), + sort_fails); + + double ratio = sum_before / sum_after; + double epsilon = 1e-10; + unsigned int equal_sum = + (ratio > (1.0 - epsilon)) && (ratio < (1.0 + epsilon)) ? 1 : 0; + + if (sort_fails) + printf("3D Sort Sum: %f %f Fails: %u\n", sum_before, sum_after, sort_fails); + + ASSERT_EQ(sort_fails, 0u); + ASSERT_EQ(equal_sum, 1u); +} + +template +void test_issue_1160_impl() { + Kokkos::View element_("element", 10); + Kokkos::View x_("x", 10); + Kokkos::View v_("y", 10); + + auto h_element = Kokkos::create_mirror_view(element_); + auto h_x = Kokkos::create_mirror_view(x_); + auto h_v = Kokkos::create_mirror_view(v_); + + h_element(0) = 9; + h_element(1) = 8; + h_element(2) = 7; + h_element(3) = 6; + h_element(4) = 5; + h_element(5) = 4; + h_element(6) = 3; + h_element(7) = 2; + h_element(8) = 1; + h_element(9) = 0; + + for (int i = 0; i < 10; ++i) { + h_v.access(i, 0) = h_x.access(i, 0) = double(h_element(i)); + } + ExecutionSpace exec; + Kokkos::deep_copy(exec, element_, h_element); + Kokkos::deep_copy(exec, x_, h_x); + Kokkos::deep_copy(exec, v_, h_v); + + using KeyViewType = decltype(element_); + using BinOp = Kokkos::BinOp1D; + + int begin = 3; + int end = 8; + auto max = h_element(begin); + auto min = h_element(end - 1); + BinOp binner(end - begin, min, max); + + Kokkos::BinSort Sorter(element_, begin, end, binner, + false); + Sorter.create_permute_vector(exec); + Sorter.sort(exec, element_, begin, end); + + Sorter.sort(exec, x_, begin, end); + Sorter.sort(exec, v_, begin, end); + + Kokkos::deep_copy(exec, h_element, element_); + Kokkos::deep_copy(exec, h_x, x_); + Kokkos::deep_copy(exec, h_v, v_); + exec.fence(); + + ASSERT_EQ(h_element(0), 9); + ASSERT_EQ(h_element(1), 8); + ASSERT_EQ(h_element(2), 7); + ASSERT_EQ(h_element(3), 2); + ASSERT_EQ(h_element(4), 3); + ASSERT_EQ(h_element(5), 4); + ASSERT_EQ(h_element(6), 5); + ASSERT_EQ(h_element(7), 6); + ASSERT_EQ(h_element(8), 1); + ASSERT_EQ(h_element(9), 0); + + for (int i = 0; i < 10; ++i) { + ASSERT_EQ(h_element(i), int(h_x.access(i, 0))); + ASSERT_EQ(h_element(i), int(h_v.access(i, 0))); + } +} + +template +void test_sort_integer_overflow() { + // FIXME: this test is meant to test something for BinSort, + // but actually uses the kokkos::sort API with the assumption + // that underneath it calls binsort. I don't think this is correct, + // because if the kokkos::sort API chages impl, this test is not testing + // what it meants to test... so need to change this to actually use BinSort + // directly. + + // array with two extrema in reverse order to expose integer overflow bug in + // bin calculation + T a[2] = {Kokkos::Experimental::finite_max::value, + Kokkos::Experimental::finite_min::value}; + auto vd = Kokkos::create_mirror_view_and_copy( + ExecutionSpace(), Kokkos::View(a)); + Kokkos::sort(vd); + auto vh = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), vd); + EXPECT_TRUE(std::is_sorted(vh.data(), vh.data() + 2)) + << "view (" << vh[0] << ", " << vh[1] << ") is not sorted"; +} + +} // namespace BinSortSetA + +TEST(TEST_CATEGORY, BinSortGenericTests) { + using ExecutionSpace = TEST_EXECSPACE; + using key_type = unsigned; + constexpr int N = 171; + +#if defined(KOKKOS_ENABLE_CUDA) && \ + defined(KOKKOS_COMPILER_NVHPC) // FIXME_NVHPC + if (!std::is_same_v) +#endif + BinSortSetA::test_3D_sort_impl(N); + +#if defined(KOKKOS_ENABLE_CUDA) && \ + defined(KOKKOS_COMPILER_NVHPC) // FIXME_NVHPC + if (!std::is_same_v) +#endif + BinSortSetA::test_issue_1160_impl(); + + BinSortSetA::test_sort_integer_overflow(); + BinSortSetA::test_sort_integer_overflow(); + BinSortSetA::test_sort_integer_overflow(); +} + +TEST(TEST_CATEGORY, BinSortEmptyView) { + using ExecutionSpace = TEST_EXECSPACE; + + // the bounds and extents used below are totally arbitrary + // and, in theory, should have no impact + + using KeyViewType = Kokkos::View; + KeyViewType kv("kv", 20); + + using BinOp_t = Kokkos::BinOp1D; + BinOp_t binOp(5, 0, 10); + Kokkos::BinSort Sorter(ExecutionSpace{}, kv, binOp); + + // does not matter if we use int or something else + Kokkos::View v("v", 0); + + // test all exposed public sort methods + ASSERT_NO_THROW(Sorter.sort(ExecutionSpace(), v, 0, 0)); + ASSERT_NO_THROW(Sorter.sort(v, 0, 0)); + ASSERT_NO_THROW(Sorter.sort(ExecutionSpace(), v)); + ASSERT_NO_THROW(Sorter.sort(v)); +} + +TEST(TEST_CATEGORY, BinSortEmptyKeysView) { + using ExecutionSpace = TEST_EXECSPACE; + + using KeyViewType = Kokkos::View; + KeyViewType kv("kv", 0); + + using BinOp_t = Kokkos::BinOp1D; + BinOp_t binOp(5, 0, 10); + Kokkos::BinSort Sorter(ExecutionSpace{}, kv, binOp); + + ASSERT_NO_THROW(Sorter.create_permute_vector(ExecutionSpace{})); +} + +} // namespace Test +#endif diff --git a/algorithms/unit_tests/TestBinSortB.hpp b/algorithms/unit_tests/TestBinSortB.hpp new file mode 100644 index 0000000000..0707411f59 --- /dev/null +++ b/algorithms/unit_tests/TestBinSortB.hpp @@ -0,0 +1,262 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KOKKOS_ALGORITHMS_UNITTESTS_TEST_BINSORTB_HPP +#define KOKKOS_ALGORITHMS_UNITTESTS_TEST_BINSORTB_HPP + +#include +#include +#include +#include +#include +#include +#include +#include //needed for iota + +namespace Test { +namespace BinSortSetB { + +template +struct CopyFunctorRank2 { + ViewTypeFrom m_view_from; + ViewTypeTo m_view_to; + + CopyFunctorRank2() = delete; + + CopyFunctorRank2(const ViewTypeFrom view_from, const ViewTypeTo view_to) + : m_view_from(view_from), m_view_to(view_to) {} + + KOKKOS_INLINE_FUNCTION + void operator()(int k) const { + const auto i = k / m_view_from.extent(1); + const auto j = k % m_view_from.extent(1); + m_view_to(i, j) = m_view_from(i, j); + } +}; + +template +auto create_deep_copyable_compatible_view_with_same_extent( + Kokkos::View view) { + using view_type = Kokkos::View; + using view_value_type = typename view_type::value_type; + using view_exespace = typename view_type::execution_space; + const std::size_t ext0 = view.extent(0); + using view_deep_copyable_t = Kokkos::View; + return view_deep_copyable_t{"view_dc", ext0}; +} + +template +auto create_deep_copyable_compatible_view_with_same_extent( + Kokkos::View view) { + using view_type = Kokkos::View; + using view_value_type = typename view_type::value_type; + using view_exespace = typename view_type::execution_space; + using view_deep_copyable_t = Kokkos::View; + const std::size_t ext0 = view.extent(0); + const std::size_t ext1 = view.extent(1); + return view_deep_copyable_t{"view_dc", ext0, ext1}; +} + +template +auto create_deep_copyable_compatible_clone(ViewType view) { + static_assert(ViewType::rank <= 2); + + auto view_dc = create_deep_copyable_compatible_view_with_same_extent(view); + using view_dc_t = decltype(view_dc); + if constexpr (ViewType::rank == 1) { + Test::stdalgos::CopyFunctor F1(view, view_dc); + Kokkos::parallel_for("copy", view.extent(0), F1); + } else { + static_assert(ViewType::rank == 2, "Only rank 1 or 2 supported."); + CopyFunctorRank2 F1(view, view_dc); + Kokkos::parallel_for("copy", view.extent(0) * view.extent(1), F1); + } + return view_dc; +} + +template +auto create_host_space_copy(ViewType view) { + auto view_dc = create_deep_copyable_compatible_clone(view); + return create_mirror_view_and_copy(Kokkos::HostSpace(), view_dc); +} + +template +auto create_rank1_dev_and_host_views_of_keys(const ExecutionSpace& exec, + int N) { + namespace KE = Kokkos::Experimental; + Kokkos::DefaultHostExecutionSpace defaultHostExeSpace; + + using KeyViewType = Kokkos::View; + KeyViewType keys("keys", N); + auto keys_h = Kokkos::create_mirror_view(keys); + std::iota(KE::begin(keys_h), KE::end(keys_h), KeyType(0)); + KE::reverse(defaultHostExeSpace, keys_h); + // keys now is = [N-1,N-2,...,2,1,0], shuffle it for avoid trivial case + std::random_device rd; + std::mt19937 g(rd()); + std::shuffle(KE::begin(keys_h), KE::end(keys_h), g); + Kokkos::deep_copy(exec, keys, keys_h); + + return std::make_pair(keys, keys_h); +} + +template = 0> +auto create_strided_view(std::size_t numRows, std::size_t /*numCols*/) { + Kokkos::LayoutStride layout{numRows, 2}; + using v_t = Kokkos::View; + v_t v("v", layout); + return v; +} + +template = 0> +auto create_strided_view(std::size_t numRows, std::size_t numCols) { + Kokkos::LayoutStride layout{numRows, 2, numCols, numRows * 2}; + using v_t = Kokkos::View; + v_t v("v", layout); + return v; +} + +template +void test_on_view_with_stride(std::size_t numRows, std::size_t indB, + std::size_t indE, std::size_t numCols = 1) { + ExecutionSpace exec; + Kokkos::DefaultHostExecutionSpace defaultHostExeSpace; + namespace KE = Kokkos::Experimental; + + // 1. generate 1D view of keys + auto [keys, keys_h] = + create_rank1_dev_and_host_views_of_keys(exec, numRows); + using KeyViewType = decltype(keys); + + // need this map key->row to use later for checking + std::unordered_map keyToRowBeforeSort; + for (std::size_t i = 0; i < numRows; ++i) { + keyToRowBeforeSort[keys_h(i)] = i; + } + + // 2. create binOp + using BinOp = Kokkos::BinOp1D; + auto itB = KE::cbegin(keys_h) + indB; + auto itE = itB + indE - indB; + auto it = KE::minmax_element(defaultHostExeSpace, itB, itE); + // seems like the behavior is odd when we use # buckets = # keys + // so use +5 for using more buckets than keys. + // This is something to investigate. + BinOp binner(indE - indB + 5, *it.first, *it.second); + + // 3. create sorter + Kokkos::BinSort sorter(keys, indB, indE, binner, false); + sorter.create_permute_vector(exec); + sorter.sort(exec, keys, indB, indE); + Kokkos::deep_copy(exec, keys_h, keys); + + auto v = create_strided_view( + numRows, numCols); + + Kokkos::Random_XorShift64_Pool pool(73931); + Kokkos::fill_random(v, pool, ValueType(545)); + auto v_before_sort_h = create_host_space_copy(v); + sorter.sort(exec, v, indB, indE); + auto v_after_sort_h = create_host_space_copy(v); + + for (size_t i = 0; i < v.extent(0); ++i) { + // if i within [indB,indE), the sorting was done + // so we need to do proper checking since rows have changed + if (i >= size_t(indB) && i < size_t(indE)) { + const KeyType key = keys_h(i); + if constexpr (ValuesViewRank == 1) { + ASSERT_TRUE(v_before_sort_h(keyToRowBeforeSort.at(key)) == + v_after_sort_h(i)); + } else { + for (size_t j = 0; j < v.extent(1); ++j) { + ASSERT_TRUE(v_before_sort_h(keyToRowBeforeSort.at(key), j) == + v_after_sort_h(i, j)); + } + } + } + // outside the target bounds, then the i-th row remains unchanged + else { + if constexpr (ValuesViewRank == 1) { + ASSERT_TRUE(v_before_sort_h(i) == v_after_sort_h(i)); + } else { + for (size_t j = 0; j < v.extent(1); ++j) { + ASSERT_TRUE(v_before_sort_h(i, j) == v_after_sort_h(i, j)); + } + } + } + } +} + +template +void run_for_rank1() { + constexpr int rank = 1; + + // trivial case + test_on_view_with_stride(1, 0, 1); + + // nontrivial cases + for (std::size_t N : {311, 710017}) { + // various cases for bounds + test_on_view_with_stride(N, 0, N); + test_on_view_with_stride(N, 3, N); + test_on_view_with_stride(N, 0, + N - 4); + test_on_view_with_stride(N, 4, + N - 3); + } +} + +template +void run_for_rank2() { + constexpr int rank = 2; + + // trivial case + test_on_view_with_stride(1, 0, 1, + 1); + + // nontrivial cases + for (std::size_t Nr : {11, 1157, 710017}) { + for (std::size_t Nc : {3, 51}) { + // various cases for bounds + test_on_view_with_stride( + Nr, 0, Nr, Nc); + test_on_view_with_stride( + Nr, 3, Nr, Nc); + test_on_view_with_stride( + Nr, 0, Nr - 4, Nc); + test_on_view_with_stride( + Nr, 4, Nr - 3, Nc); + } + } +} + +} // namespace BinSortSetB + +TEST(TEST_CATEGORY, BinSortUnsignedKeyLayoutStrideValues) { + using ExeSpace = TEST_EXECSPACE; + using key_type = unsigned; + BinSortSetB::run_for_rank1(); + BinSortSetB::run_for_rank1(); + + BinSortSetB::run_for_rank2(); + BinSortSetB::run_for_rank2(); +} + +} // namespace Test +#endif diff --git a/algorithms/unit_tests/TestNestedSort.hpp b/algorithms/unit_tests/TestNestedSort.hpp index 37ee211b42..1b7a3f48fc 100644 --- a/algorithms/unit_tests/TestNestedSort.hpp +++ b/algorithms/unit_tests/TestNestedSort.hpp @@ -17,14 +17,14 @@ #ifndef KOKKOS_ALGORITHMS_UNITTESTS_TEST_NESTED_SORT_HPP #define KOKKOS_ALGORITHMS_UNITTESTS_TEST_NESTED_SORT_HPP +#include #include #include #include #include namespace Test { - -namespace Impl { +namespace NestedSortImpl { // Comparator for sorting in descending order template @@ -383,24 +383,28 @@ void test_nested_sort_by_key(unsigned int N, KeyType minKey, KeyType maxKey, test_nested_sort_by_key_impl( N, N, false, true, minKey, maxKey, minVal, maxVal); } -} // namespace Impl +} // namespace NestedSortImpl TEST(TEST_CATEGORY, NestedSort) { - Impl::test_nested_sort(171, 0U, UINT_MAX); - Impl::test_nested_sort(42, -1e6f, 1e6f); - Impl::test_nested_sort(67, CHAR_MIN, CHAR_MAX); + using ExecutionSpace = TEST_EXECSPACE; + NestedSortImpl::test_nested_sort(171, 0U, UINT_MAX); + NestedSortImpl::test_nested_sort(42, -1e6f, 1e6f); + NestedSortImpl::test_nested_sort(67, CHAR_MIN, + CHAR_MAX); } TEST(TEST_CATEGORY, NestedSortByKey) { + using ExecutionSpace = TEST_EXECSPACE; + // Second/third template arguments are key and value respectively. // In sort_by_key_X functions, a key view and a value view are both permuted // to make the keys sorted. This means that the value type doesn't need to be // ordered, unlike key - Impl::test_nested_sort_by_key( + NestedSortImpl::test_nested_sort_by_key( 161, 0U, UINT_MAX, 0U, UINT_MAX); - Impl::test_nested_sort_by_key( + NestedSortImpl::test_nested_sort_by_key( 267, -1e6f, 1e6f, CHAR_MIN, CHAR_MAX); - Impl::test_nested_sort_by_key( + NestedSortImpl::test_nested_sort_by_key( 11, CHAR_MIN, CHAR_MAX, 2.718, 3.14); } diff --git a/algorithms/unit_tests/TestOpenMP_Sort1D.cpp b/algorithms/unit_tests/TestOpenMP_Sort1D.cpp deleted file mode 100644 index e06486618f..0000000000 --- a/algorithms/unit_tests/TestOpenMP_Sort1D.cpp +++ /dev/null @@ -1,39 +0,0 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER - -#include -#ifdef KOKKOS_ENABLE_OPENMP - -#include -#include - -//---------------------------------------------------------------------------- -#include -#include -#include - -namespace Test { - -TEST(openmp, SortUnsigned1D) { - Impl::test_1D_sort(171); -} - -TEST(openmp, SortIssue1160) { Impl::test_issue_1160_sort(); } - -} // namespace Test -#else -void KOKKOS_ALGORITHMS_UNITTESTS_TESTOPENMP_PREVENT_LINK_ERROR() {} -#endif diff --git a/algorithms/unit_tests/TestOpenMP_SortDynamicView.cpp b/algorithms/unit_tests/TestOpenMP_SortDynamicView.cpp deleted file mode 100644 index 549d09f1f2..0000000000 --- a/algorithms/unit_tests/TestOpenMP_SortDynamicView.cpp +++ /dev/null @@ -1,37 +0,0 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER - -#include -#ifdef KOKKOS_ENABLE_OPENMP - -#include -#include - -//---------------------------------------------------------------------------- -#include -#include -#include - -namespace Test { - -TEST(openmp, SortUnsignedDynamicView) { - Impl::test_dynamic_view_sort(171); -} - -} // namespace Test -#else -void KOKKOS_ALGORITHMS_UNITTESTS_TESTOPENMP_PREVENT_LINK_ERROR() {} -#endif diff --git a/algorithms/unit_tests/TestRandom.hpp b/algorithms/unit_tests/TestRandom.hpp index 607e94c784..e9dc3327a6 100644 --- a/algorithms/unit_tests/TestRandom.hpp +++ b/algorithms/unit_tests/TestRandom.hpp @@ -14,8 +14,8 @@ // //@HEADER -#ifndef KOKKOS_TEST_DUALVIEW_HPP -#define KOKKOS_TEST_DUALVIEW_HPP +#ifndef KOKKOS_ALGORITHMS_UNITTESTS_TEST_RANDOM_HPP +#define KOKKOS_ALGORITHMS_UNITTESTS_TEST_RANDOM_HPP #include #include @@ -29,8 +29,7 @@ #include namespace Test { - -namespace Impl { +namespace AlgoRandomImpl { // This test runs the random number generators and uses some statistic tests to // check the 'goodness' of the random numbers: @@ -469,42 +468,46 @@ struct TestDynRankView { ASSERT_LE(val.max_val, max); } }; -} // namespace Impl -template -void test_random_xorshift64() { +} // namespace AlgoRandomImpl + +TEST(TEST_CATEGORY, Random_XorShift64) { + using ExecutionSpace = TEST_EXECSPACE; + #if defined(KOKKOS_ENABLE_SYCL) || defined(KOKKOS_ENABLE_CUDA) || \ defined(KOKKOS_ENABLE_HIP) const int num_draws = 132141141; #else // SERIAL, HPX, OPENMP const int num_draws = 10240000; #endif - Impl::test_random>(num_draws); - Impl::test_random>( + num_draws); + AlgoRandomImpl::test_random>>( num_draws); - Impl::TestDynRankView>(10000) + AlgoRandomImpl::TestDynRankView< + ExecutionSpace, Kokkos::Random_XorShift64_Pool>(10000) .run(); } -template -void test_random_xorshift1024() { +TEST(TEST_CATEGORY, Random_XorShift1024_0) { + using ExecutionSpace = TEST_EXECSPACE; + #if defined(KOKKOS_ENABLE_SYCL) || defined(KOKKOS_ENABLE_CUDA) || \ defined(KOKKOS_ENABLE_HIP) const int num_draws = 52428813; #else // SERIAL, HPX, OPENMP const int num_draws = 10130144; #endif - Impl::test_random>( + AlgoRandomImpl::test_random>( num_draws); - Impl::test_random>>( num_draws); - Impl::TestDynRankView>(10000) + AlgoRandomImpl::TestDynRankView< + ExecutionSpace, Kokkos::Random_XorShift1024_Pool>(10000) .run(); } -} // namespace Test -#endif // KOKKOS_TEST_UNORDERED_MAP_HPP +} // namespace Test +#endif diff --git a/algorithms/unit_tests/TestRandomAccessIterator.cpp b/algorithms/unit_tests/TestRandomAccessIterator.cpp index 439d171c8a..fd3a875b1e 100644 --- a/algorithms/unit_tests/TestRandomAccessIterator.cpp +++ b/algorithms/unit_tests/TestRandomAccessIterator.cpp @@ -54,7 +54,7 @@ void test_random_access_it_verify(IteratorType it, ValueType gold_value) { Kokkos::parallel_for("_std_algo_copy", 1, cf); auto v_h = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), checkView); - EXPECT_EQ(v_h(), gold_value); + ASSERT_EQ(v_h(), gold_value); } TEST_F(random_access_iterator_test, dereference) { @@ -96,9 +96,9 @@ void test_random_access_it_subscript_op_verify(IteratorType it) { auto v_h = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), checkView); - EXPECT_EQ(v_h(0), (value_t)0); - EXPECT_EQ(v_h(1), (value_t)1); - EXPECT_EQ(v_h(2), (value_t)2); + ASSERT_EQ(v_h(0), (value_t)0); + ASSERT_EQ(v_h(1), (value_t)1); + ASSERT_EQ(v_h(2), (value_t)2); } TEST_F(random_access_iterator_test, subscript_operator) { @@ -188,9 +188,9 @@ TEST_F(random_access_iterator_test, operatorsSet4) { auto it7 = KE::Impl::RandomAccessIterator(m_static_view, 3); auto it8 = KE::Impl::RandomAccessIterator(m_dynamic_view, 3); auto it9 = KE::Impl::RandomAccessIterator(m_strided_view, 3); - EXPECT_EQ(it1, it7); - EXPECT_EQ(it2, it8); - EXPECT_EQ(it3, it9); + ASSERT_EQ(it1, it7); + ASSERT_EQ(it2, it8); + ASSERT_EQ(it3, it9); EXPECT_GE(it1, it7); EXPECT_GE(it2, it8); EXPECT_GE(it3, it9); @@ -205,16 +205,16 @@ TEST_F(random_access_iterator_test, assignment_operator) { EXPECT_NE(it1, it2); it2 = it1; - EXPECT_EQ(it1, it2); + ASSERT_EQ(it1, it2); } TEST_F(random_access_iterator_test, distance) { auto first = KE::begin(m_dynamic_view); auto last = KE::end(m_dynamic_view); - EXPECT_EQ(0, KE::distance(first, first)); - EXPECT_EQ(1, KE::distance(first, first + 1)); - EXPECT_EQ(m_dynamic_view.extent(0), size_t(KE::distance(first, last))); + ASSERT_EQ(0, KE::distance(first, first)); + ASSERT_EQ(1, KE::distance(first, first + 1)); + ASSERT_EQ(m_dynamic_view.extent(0), size_t(KE::distance(first, last))); } } // namespace stdalgos diff --git a/algorithms/unit_tests/TestSort.hpp b/algorithms/unit_tests/TestSort.hpp index d903888878..968fb8950b 100644 --- a/algorithms/unit_tests/TestSort.hpp +++ b/algorithms/unit_tests/TestSort.hpp @@ -14,8 +14,8 @@ // //@HEADER -#ifndef KOKKOS_ALGORITHMS_UNITTESTS_TESTSORT_HPP -#define KOKKOS_ALGORITHMS_UNITTESTS_TESTSORT_HPP +#ifndef KOKKOS_ALGORITHMS_UNITTESTS_TEST_SORT_HPP +#define KOKKOS_ALGORITHMS_UNITTESTS_TEST_SORT_HPP #include #include @@ -24,8 +24,7 @@ #include namespace Test { - -namespace Impl { +namespace SortImpl { template struct is_sorted_struct { @@ -53,56 +52,6 @@ struct sum { void operator()(int i, double& count) const { count += keys(i); } }; -template -struct bin3d_is_sorted_struct { - using value_type = unsigned int; - using execution_space = ExecutionSpace; - - Kokkos::View keys; - - int max_bins; - Scalar min; - Scalar max; - - bin3d_is_sorted_struct(Kokkos::View keys_, - int max_bins_, Scalar min_, Scalar max_) - : keys(keys_), max_bins(max_bins_), min(min_), max(max_) {} - KOKKOS_INLINE_FUNCTION - void operator()(int i, unsigned int& count) const { - int ix1 = int((keys(i, 0) - min) / max * max_bins); - int iy1 = int((keys(i, 1) - min) / max * max_bins); - int iz1 = int((keys(i, 2) - min) / max * max_bins); - int ix2 = int((keys(i + 1, 0) - min) / max * max_bins); - int iy2 = int((keys(i + 1, 1) - min) / max * max_bins); - int iz2 = int((keys(i + 1, 2) - min) / max * max_bins); - - if (ix1 > ix2) - count++; - else if (ix1 == ix2) { - if (iy1 > iy2) - count++; - else if ((iy1 == iy2) && (iz1 > iz2)) - count++; - } - } -}; - -template -struct sum3D { - using value_type = double; - using execution_space = ExecutionSpace; - - Kokkos::View keys; - - sum3D(Kokkos::View keys_) : keys(keys_) {} - KOKKOS_INLINE_FUNCTION - void operator()(int i, double& count) const { - count += keys(i, 0); - count += keys(i, 1); - count += keys(i, 2); - } -}; - template void test_1D_sort_impl(unsigned int n) { using KeyViewType = Kokkos::View; @@ -142,57 +91,6 @@ void test_1D_sort_impl(unsigned int n) { ASSERT_EQ(equal_sum, 1u); } -template -void test_3D_sort_impl(unsigned int n) { - using KeyViewType = Kokkos::View; - - KeyViewType keys("Keys", n * n * n); - - Kokkos::Random_XorShift64_Pool g(1931); - Kokkos::fill_random(keys, g, 100.0); - - double sum_before = 0.0; - double sum_after = 0.0; - unsigned int sort_fails = 0; - - ExecutionSpace exec; - Kokkos::parallel_reduce( - Kokkos::RangePolicy(exec, 0, keys.extent(0)), - sum3D(keys), sum_before); - - int bin_1d = 1; - while (bin_1d * bin_1d * bin_1d * 4 < (int)keys.extent(0)) bin_1d *= 2; - int bin_max[3] = {bin_1d, bin_1d, bin_1d}; - typename KeyViewType::value_type min[3] = {0, 0, 0}; - typename KeyViewType::value_type max[3] = {100, 100, 100}; - - using BinOp = Kokkos::BinOp3D; - BinOp bin_op(bin_max, min, max); - Kokkos::BinSort Sorter(keys, bin_op, false); - Sorter.create_permute_vector(exec); - Sorter.sort(exec, keys); - - Kokkos::parallel_reduce( - Kokkos::RangePolicy(exec, 0, keys.extent(0)), - sum3D(keys), sum_after); - Kokkos::parallel_reduce( - Kokkos::RangePolicy(exec, 0, keys.extent(0) - 1), - bin3d_is_sorted_struct(keys, bin_1d, min[0], - max[0]), - sort_fails); - - double ratio = sum_before / sum_after; - double epsilon = 1e-10; - unsigned int equal_sum = - (ratio > (1.0 - epsilon)) && (ratio < (1.0 + epsilon)) ? 1 : 0; - - if (sort_fails) - printf("3D Sort Sum: %f %f Fails: %u\n", sum_before, sum_after, sort_fails); - - ASSERT_EQ(sort_fails, 0u); - ASSERT_EQ(equal_sum, 1u); -} - //---------------------------------------------------------------------------- template @@ -259,74 +157,6 @@ void test_dynamic_view_sort_impl(unsigned int n) { //---------------------------------------------------------------------------- -template -void test_issue_1160_impl() { - Kokkos::View element_("element", 10); - Kokkos::View x_("x", 10); - Kokkos::View v_("y", 10); - - auto h_element = Kokkos::create_mirror_view(element_); - auto h_x = Kokkos::create_mirror_view(x_); - auto h_v = Kokkos::create_mirror_view(v_); - - h_element(0) = 9; - h_element(1) = 8; - h_element(2) = 7; - h_element(3) = 6; - h_element(4) = 5; - h_element(5) = 4; - h_element(6) = 3; - h_element(7) = 2; - h_element(8) = 1; - h_element(9) = 0; - - for (int i = 0; i < 10; ++i) { - h_v.access(i, 0) = h_x.access(i, 0) = double(h_element(i)); - } - ExecutionSpace exec; - Kokkos::deep_copy(exec, element_, h_element); - Kokkos::deep_copy(exec, x_, h_x); - Kokkos::deep_copy(exec, v_, h_v); - - using KeyViewType = decltype(element_); - using BinOp = Kokkos::BinOp1D; - - int begin = 3; - int end = 8; - auto max = h_element(begin); - auto min = h_element(end - 1); - BinOp binner(end - begin, min, max); - - Kokkos::BinSort Sorter(element_, begin, end, binner, - false); - Sorter.create_permute_vector(exec); - Sorter.sort(exec, element_, begin, end); - - Sorter.sort(exec, x_, begin, end); - Sorter.sort(exec, v_, begin, end); - - Kokkos::deep_copy(exec, h_element, element_); - Kokkos::deep_copy(exec, h_x, x_); - Kokkos::deep_copy(exec, h_v, v_); - exec.fence(); - - ASSERT_EQ(h_element(0), 9); - ASSERT_EQ(h_element(1), 8); - ASSERT_EQ(h_element(2), 7); - ASSERT_EQ(h_element(3), 2); - ASSERT_EQ(h_element(4), 3); - ASSERT_EQ(h_element(5), 4); - ASSERT_EQ(h_element(6), 5); - ASSERT_EQ(h_element(7), 6); - ASSERT_EQ(h_element(8), 1); - ASSERT_EQ(h_element(9), 0); - - for (int i = 0; i < 10; ++i) { - ASSERT_EQ(h_element(i), int(h_x.access(i, 0))); - ASSERT_EQ(h_element(i), int(h_v.access(i, 0))); - } -} - template void test_issue_4978_impl() { Kokkos::View element_("element", 9); @@ -376,55 +206,33 @@ void test_sort_integer_overflow() { << "view (" << vh[0] << ", " << vh[1] << ") is not sorted"; } -//---------------------------------------------------------------------------- +} // namespace SortImpl -template -void test_1D_sort(unsigned int N) { - test_1D_sort_impl(N * N * N); -} +TEST(TEST_CATEGORY, SortUnsignedValueType) { + using ExecutionSpace = TEST_EXECSPACE; + using key_type = unsigned; + constexpr int N = 171; -template -void test_3D_sort(unsigned int N) { - test_3D_sort_impl(N); -} + SortImpl::test_1D_sort_impl(N * N * N); -template -void test_dynamic_view_sort(unsigned int N) { - test_dynamic_view_sort_impl(N * N); -} +#ifndef KOKKOS_ENABLE_OPENMPTARGET + // FIXME_OPENMPTARGET: OpenMPTarget doesn't support DynamicView yet. + SortImpl::test_dynamic_view_sort_impl(N * N); +#endif -template -void test_issue_1160_sort() { - test_issue_1160_impl(); + SortImpl::test_issue_4978_impl(); } -template -void test_issue_4978_sort() { - test_issue_4978_impl(); -} +TEST(TEST_CATEGORY, SortEmptyView) { + using ExecutionSpace = TEST_EXECSPACE; -template -void test_sort(unsigned int N) { - test_1D_sort(N); -#if defined(KOKKOS_ENABLE_CUDA) && \ - defined(KOKKOS_COMPILER_NVHPC) // FIXME_NVHPC - if (!std::is_same_v) -#endif - test_3D_sort(N); -// FIXME_OPENMPTARGET: OpenMPTarget doesn't support DynamicView yet. -#ifndef KOKKOS_ENABLE_OPENMPTARGET - test_dynamic_view_sort(N); -#endif -#if defined(KOKKOS_ENABLE_CUDA) && \ - defined(KOKKOS_COMPILER_NVHPC) // FIXME_NVHPC - if (!std::is_same_v) -#endif - test_issue_1160_sort(); - test_issue_4978_sort(); - test_sort_integer_overflow(); - test_sort_integer_overflow(); - test_sort_integer_overflow(); + // does not matter if we use int or something else + Kokkos::View v("v", 0); + + // TODO check the synchronous behavior of the calls below + ASSERT_NO_THROW(Kokkos::sort(ExecutionSpace(), v)); + ASSERT_NO_THROW(Kokkos::sort(v)); } -} // namespace Impl + } // namespace Test -#endif /* KOKKOS_ALGORITHMS_UNITTESTS_TESTSORT_HPP */ +#endif diff --git a/algorithms/unit_tests/TestStdAlgorithmsAdjacentDifference.cpp b/algorithms/unit_tests/TestStdAlgorithmsAdjacentDifference.cpp index d414d524b6..75ad533f6e 100644 --- a/algorithms/unit_tests/TestStdAlgorithmsAdjacentDifference.cpp +++ b/algorithms/unit_tests/TestStdAlgorithmsAdjacentDifference.cpp @@ -157,7 +157,7 @@ void verify_data(TestViewType test_view, GoldViewType gold) { const auto gold_h = create_mirror_view_and_copy(Kokkos::HostSpace(), gold); for (std::size_t i = 0; i < test_view.extent(0); ++i) { - EXPECT_EQ(gold_h(i), test_view_dc_h(i)); + ASSERT_EQ(gold_h(i), test_view_dc_h(i)); } } @@ -197,7 +197,7 @@ void run_single_scenario(const InfoType& scenario_info, auto res1 = KE::adjacent_difference(exespace(), KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), args...); - EXPECT_EQ(res1, KE::end(view_dest)); + ASSERT_EQ(res1, KE::end(view_dest)); verify_data(view_dest, gold); } @@ -207,7 +207,7 @@ void run_single_scenario(const InfoType& scenario_info, auto res2 = KE::adjacent_difference( "label", exespace(), KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), args...); - EXPECT_EQ(res2, KE::end(view_dest)); + ASSERT_EQ(res2, KE::end(view_dest)); verify_data(view_dest, gold); } @@ -216,7 +216,7 @@ void run_single_scenario(const InfoType& scenario_info, create_view(Tag{}, view_ext, "adj_diff_dest_view"); auto res3 = KE::adjacent_difference(exespace(), view_from, view_dest, args...); - EXPECT_EQ(res3, KE::end(view_dest)); + ASSERT_EQ(res3, KE::end(view_dest)); verify_data(view_dest, gold); } @@ -225,7 +225,7 @@ void run_single_scenario(const InfoType& scenario_info, create_view(Tag{}, view_ext, "adj_diff_dest_view"); auto res4 = KE::adjacent_difference("label", exespace(), view_from, view_dest, args...); - EXPECT_EQ(res4, KE::end(view_dest)); + ASSERT_EQ(res4, KE::end(view_dest)); verify_data(view_dest, gold); } diff --git a/algorithms/unit_tests/TestStdAlgorithmsAdjacentFind.cpp b/algorithms/unit_tests/TestStdAlgorithmsAdjacentFind.cpp index ee34761265..fa4ff48dbe 100644 --- a/algorithms/unit_tests/TestStdAlgorithmsAdjacentFind.cpp +++ b/algorithms/unit_tests/TestStdAlgorithmsAdjacentFind.cpp @@ -229,7 +229,7 @@ void verify(DiffType my_diff, ViewType view, Args... args) { my_std_adjacent_find(KE::cbegin(view_h), KE::cend(view_h), args...); const auto std_diff = std_r - KE::cbegin(view_h); - EXPECT_EQ(my_diff, std_diff); + ASSERT_EQ(my_diff, std_diff); } template @@ -287,12 +287,6 @@ void run_all_scenarios() { } TEST(std_algorithms_nonmod_seq_ops, adjacent_find) { -#if defined(KOKKOS_ENABLE_CUDA) && \ - defined(KOKKOS_COMPILER_NVHPC) // FIXME_NVHPC - if constexpr (std::is_same_v) { - GTEST_SKIP() << "FIXME wrong result"; - } -#endif run_all_scenarios(); run_all_scenarios(); run_all_scenarios(); diff --git a/algorithms/unit_tests/TestStdAlgorithmsAllAnyNoneOf.cpp b/algorithms/unit_tests/TestStdAlgorithmsAllAnyNoneOf.cpp index 1c39a4735e..cccc0f6c18 100644 --- a/algorithms/unit_tests/TestStdAlgorithmsAllAnyNoneOf.cpp +++ b/algorithms/unit_tests/TestStdAlgorithmsAllAnyNoneOf.cpp @@ -147,12 +147,6 @@ void run_all_scenarios() { } TEST(std_algorithms_all_any_none_of_test, test) { -#if defined(KOKKOS_ENABLE_CUDA) && \ - defined(KOKKOS_COMPILER_NVHPC) // FIXME_NVHPC - if constexpr (std::is_same_v) { - GTEST_SKIP() << "FIXME wrong result"; - } -#endif run_all_scenarios(); run_all_scenarios(); run_all_scenarios(); diff --git a/algorithms/unit_tests/TestStdAlgorithmsCommon.hpp b/algorithms/unit_tests/TestStdAlgorithmsCommon.hpp index 694676a878..5b30b9eda7 100644 --- a/algorithms/unit_tests/TestStdAlgorithmsCommon.hpp +++ b/algorithms/unit_tests/TestStdAlgorithmsCommon.hpp @@ -110,7 +110,7 @@ verify_values(ValueType expected, const ViewType view) { "Non-matching value types of view and reference value"); auto view_h = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), view); for (std::size_t i = 0; i < view_h.extent(0); i++) { - EXPECT_EQ(expected, view_h(i)); + ASSERT_EQ(expected, view_h(i)); } } @@ -130,7 +130,7 @@ verify_values(ValueType expected, const ViewType view) { auto view_h = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), tmpView); for (std::size_t i = 0; i < view_h.extent(0); i++) { - EXPECT_EQ(expected, view_h(i)); + ASSERT_EQ(expected, view_h(i)); } } @@ -147,7 +147,7 @@ compare_views(ViewType1 expected, const ViewType2 actual) { Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), actual); for (std::size_t i = 0; i < expected_h.extent(0); i++) { - EXPECT_EQ(expected_h(i), actual_h(i)); + ASSERT_EQ(expected_h(i), actual_h(i)); } } @@ -171,7 +171,7 @@ compare_views(ViewType1 expected, const ViewType2 actual) { Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), expected); for (std::size_t i = 0; i < expected_h.extent(0); i++) { - EXPECT_EQ(expected_h(i), actual_h(i)); + ASSERT_EQ(expected_h(i), actual_h(i)); } } diff --git a/algorithms/unit_tests/TestStdAlgorithmsConstraints.cpp b/algorithms/unit_tests/TestStdAlgorithmsConstraints.cpp index 5d55199801..386d533f7a 100644 --- a/algorithms/unit_tests/TestStdAlgorithmsConstraints.cpp +++ b/algorithms/unit_tests/TestStdAlgorithmsConstraints.cpp @@ -42,8 +42,8 @@ TEST(std_algorithms, is_admissible_to_std_algorithms) { using strided_view_1d_t = Kokkos::View; Kokkos::LayoutStride layout1d{extent0, 2}; strided_view_1d_t strided_view_1d{"std-algo-test-1d-strided-view", layout1d}; - EXPECT_EQ(layout1d.dimension[0], 13u); - EXPECT_EQ(layout1d.stride[0], 2u); + ASSERT_EQ(layout1d.dimension[0], 13u); + ASSERT_EQ(layout1d.stride[0], 2u); // they are admissible KE::Impl::static_assert_is_admissible_to_kokkos_std_algorithms( static_view_1d); diff --git a/algorithms/unit_tests/TestStdAlgorithmsCopyIf.cpp b/algorithms/unit_tests/TestStdAlgorithmsCopyIf.cpp index e21d50f69b..5778e37be0 100644 --- a/algorithms/unit_tests/TestStdAlgorithmsCopyIf.cpp +++ b/algorithms/unit_tests/TestStdAlgorithmsCopyIf.cpp @@ -135,49 +135,49 @@ void verify_data(const std::string& name, ViewTypeFrom view_from, } else if (name == "one-element-a") { - EXPECT_EQ(view_test_h(0), static_cast(0)); + ASSERT_EQ(view_test_h(0), static_cast(0)); } else if (name == "one-element-b") { - EXPECT_EQ(view_test_h(0), static_cast(2)); + ASSERT_EQ(view_test_h(0), static_cast(2)); } else if (name == "two-elements-a") { - EXPECT_EQ(view_test_h(0), static_cast(2)); - EXPECT_EQ(view_test_h(1), static_cast(0)); + ASSERT_EQ(view_test_h(0), static_cast(2)); + ASSERT_EQ(view_test_h(1), static_cast(0)); } else if (name == "two-elements-b") { - EXPECT_EQ(view_test_h(0), static_cast(2)); - EXPECT_EQ(view_test_h(1), static_cast(0)); + ASSERT_EQ(view_test_h(0), static_cast(2)); + ASSERT_EQ(view_test_h(1), static_cast(0)); } else if (name == "small-a") { - EXPECT_EQ(view_test_h(0), static_cast(-4)); - EXPECT_EQ(view_test_h(1), static_cast(-2)); - EXPECT_EQ(view_test_h(2), static_cast(0)); - EXPECT_EQ(view_test_h(3), static_cast(2)); - EXPECT_EQ(view_test_h(4), static_cast(4)); - EXPECT_EQ(view_test_h(5), static_cast(0)); - EXPECT_EQ(view_test_h(6), static_cast(0)); - EXPECT_EQ(view_test_h(7), static_cast(0)); - EXPECT_EQ(view_test_h(8), static_cast(0)); + ASSERT_EQ(view_test_h(0), static_cast(-4)); + ASSERT_EQ(view_test_h(1), static_cast(-2)); + ASSERT_EQ(view_test_h(2), static_cast(0)); + ASSERT_EQ(view_test_h(3), static_cast(2)); + ASSERT_EQ(view_test_h(4), static_cast(4)); + ASSERT_EQ(view_test_h(5), static_cast(0)); + ASSERT_EQ(view_test_h(6), static_cast(0)); + ASSERT_EQ(view_test_h(7), static_cast(0)); + ASSERT_EQ(view_test_h(8), static_cast(0)); } else if (name == "small-b") { - EXPECT_EQ(view_test_h(0), static_cast(22)); - EXPECT_EQ(view_test_h(1), static_cast(-12)); - EXPECT_EQ(view_test_h(2), static_cast(22)); - EXPECT_EQ(view_test_h(3), static_cast(-12)); - EXPECT_EQ(view_test_h(4), static_cast(22)); - EXPECT_EQ(view_test_h(5), static_cast(-12)); - EXPECT_EQ(view_test_h(6), static_cast(22)); - EXPECT_EQ(view_test_h(7), static_cast(-12)); - EXPECT_EQ(view_test_h(8), static_cast(22)); - EXPECT_EQ(view_test_h(9), static_cast(-12)); - EXPECT_EQ(view_test_h(10), static_cast(22)); - EXPECT_EQ(view_test_h(11), static_cast(-12)); - EXPECT_EQ(view_test_h(12), static_cast(22)); + ASSERT_EQ(view_test_h(0), static_cast(22)); + ASSERT_EQ(view_test_h(1), static_cast(-12)); + ASSERT_EQ(view_test_h(2), static_cast(22)); + ASSERT_EQ(view_test_h(3), static_cast(-12)); + ASSERT_EQ(view_test_h(4), static_cast(22)); + ASSERT_EQ(view_test_h(5), static_cast(-12)); + ASSERT_EQ(view_test_h(6), static_cast(22)); + ASSERT_EQ(view_test_h(7), static_cast(-12)); + ASSERT_EQ(view_test_h(8), static_cast(22)); + ASSERT_EQ(view_test_h(9), static_cast(-12)); + ASSERT_EQ(view_test_h(10), static_cast(22)); + ASSERT_EQ(view_test_h(11), static_cast(-12)); + ASSERT_EQ(view_test_h(12), static_cast(22)); } else if (name == "medium" || name == "large") { @@ -190,14 +190,14 @@ void verify_data(const std::string& name, ViewTypeFrom view_from, std::size_t count = 0; for (std::size_t i = 0; i < view_from_h.extent(0); ++i) { if (pred(view_from_h(i))) { - EXPECT_EQ(view_test_h(count), view_from_h(i)); + ASSERT_EQ(view_test_h(count), view_from_h(i)); count++; } } // all other entries of test view should be zero for (; count < view_test_h.extent(0); ++count) { // std::cout << count << '\n'; - EXPECT_EQ(view_test_h(count), value_type(0)); + ASSERT_EQ(view_test_h(count), value_type(0)); } } @@ -226,7 +226,7 @@ void run_single_scenario(const InfoType& scenario_info) { auto rit = KE::copy_if(exespace(), KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), pred); verify_data(name, view_from, view_dest, pred); - EXPECT_EQ(rit, (KE::begin(view_dest) + n)); + ASSERT_EQ(rit, (KE::begin(view_dest) + n)); } { @@ -235,7 +235,7 @@ void run_single_scenario(const InfoType& scenario_info) { auto rit = KE::copy_if("label", exespace(), KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), pred); verify_data(name, view_from, view_dest, pred); - EXPECT_EQ(rit, (KE::begin(view_dest) + n)); + ASSERT_EQ(rit, (KE::begin(view_dest) + n)); } { @@ -243,7 +243,7 @@ void run_single_scenario(const InfoType& scenario_info) { auto view_dest = create_view(Tag{}, view_ext, "copy_if_dest"); auto rit = KE::copy_if(exespace(), view_from, view_dest, pred); verify_data(name, view_from, view_dest, pred); - EXPECT_EQ(rit, (KE::begin(view_dest) + n)); + ASSERT_EQ(rit, (KE::begin(view_dest) + n)); } { @@ -251,7 +251,7 @@ void run_single_scenario(const InfoType& scenario_info) { auto view_dest = create_view(Tag{}, view_ext, "copy_if_dest"); auto rit = KE::copy_if("label", exespace(), view_from, view_dest, pred); verify_data(name, view_from, view_dest, pred); - EXPECT_EQ(rit, (KE::begin(view_dest) + n)); + ASSERT_EQ(rit, (KE::begin(view_dest) + n)); } Kokkos::fence(); diff --git a/algorithms/unit_tests/TestStdAlgorithmsCount.cpp b/algorithms/unit_tests/TestStdAlgorithmsCount.cpp index 9423d2e15a..32e9883709 100644 --- a/algorithms/unit_tests/TestStdAlgorithmsCount.cpp +++ b/algorithms/unit_tests/TestStdAlgorithmsCount.cpp @@ -35,13 +35,13 @@ void test_count(const ViewType view) { const value_t count_value = 0; const auto std_result = std::count(KE::cbegin(expected), KE::cend(expected), count_value); - EXPECT_EQ(view.extent(0), size_t(std_result)); + ASSERT_EQ(view.extent(0), size_t(std_result)); // pass const iterators - EXPECT_EQ(std_result, KE::count(exespace(), KE::cbegin(view), + ASSERT_EQ(std_result, KE::count(exespace(), KE::cbegin(view), KE::cend(view), count_value)); // pass view - EXPECT_EQ(std_result, KE::count(exespace(), view, count_value)); + ASSERT_EQ(std_result, KE::count(exespace(), view, count_value)); } { @@ -50,10 +50,10 @@ void test_count(const ViewType view) { std::count(KE::cbegin(expected), KE::cend(expected), count_value); // pass iterators - EXPECT_EQ(std_result, KE::count("label", exespace(), KE::begin(view), + ASSERT_EQ(std_result, KE::count("label", exespace(), KE::begin(view), KE::end(view), count_value)); // pass view - EXPECT_EQ(std_result, KE::count("label", exespace(), view, count_value)); + ASSERT_EQ(std_result, KE::count("label", exespace(), view, count_value)); } } @@ -67,24 +67,24 @@ void test_count_if(const ViewType view) { // no positive elements (all zeroes) const auto predicate = IsPositiveFunctor(); - EXPECT_EQ(0, + ASSERT_EQ(0, std::count_if(KE::begin(expected), KE::end(expected), predicate)); // pass iterators - EXPECT_EQ( + ASSERT_EQ( 0, KE::count_if(exespace(), KE::begin(view), KE::end(view), predicate)); // pass view - EXPECT_EQ(0, KE::count_if(exespace(), view, predicate)); + ASSERT_EQ(0, KE::count_if(exespace(), view, predicate)); fill_views_inc(view, expected); const auto std_result = std::count_if(KE::begin(expected), KE::end(expected), predicate); // pass const iterators - EXPECT_EQ(std_result, KE::count_if("label", exespace(), KE::cbegin(view), + ASSERT_EQ(std_result, KE::count_if("label", exespace(), KE::cbegin(view), KE::cend(view), predicate)); // pass view - EXPECT_EQ(std_result, KE::count_if("label", exespace(), view, predicate)); + ASSERT_EQ(std_result, KE::count_if("label", exespace(), view, predicate)); } template diff --git a/algorithms/unit_tests/TestStdAlgorithmsExclusiveScan.cpp b/algorithms/unit_tests/TestStdAlgorithmsExclusiveScan.cpp index 4969541a02..799de8b0c4 100644 --- a/algorithms/unit_tests/TestStdAlgorithmsExclusiveScan.cpp +++ b/algorithms/unit_tests/TestStdAlgorithmsExclusiveScan.cpp @@ -157,7 +157,7 @@ void verify_data(ViewType1 data_view, // contains data // << gold_h(i) << " " << test_view_h(i) << " " // << std::abs(gold_h(i) - test_view_h(i)) << std::endl; if (std::is_same::value) { - EXPECT_EQ(gold_h(i), test_view_h(i)); + ASSERT_EQ(gold_h(i), test_view_h(i)); } else { const auto error = std::abs(static_cast(gold_h(i) - test_view_h(i))); @@ -213,7 +213,7 @@ void run_single_scenario_default_op(const InfoType& scenario_info, auto r = KE::exclusive_scan(exespace(), KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), init_value); - EXPECT_EQ(r, KE::end(view_dest)); + ASSERT_EQ(r, KE::end(view_dest)); verify_data(view_from, view_dest, init_value, default_op()); } @@ -222,14 +222,14 @@ void run_single_scenario_default_op(const InfoType& scenario_info, auto r = KE::exclusive_scan("label", exespace(), KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), init_value); - EXPECT_EQ(r, KE::end(view_dest)); + ASSERT_EQ(r, KE::end(view_dest)); verify_data(view_from, view_dest, init_value, default_op()); } { fill_zero(view_dest); auto r = KE::exclusive_scan(exespace(), view_from, view_dest, init_value); - EXPECT_EQ(r, KE::end(view_dest)); + ASSERT_EQ(r, KE::end(view_dest)); verify_data(view_from, view_dest, init_value, default_op()); } @@ -237,7 +237,7 @@ void run_single_scenario_default_op(const InfoType& scenario_info, fill_zero(view_dest); auto r = KE::exclusive_scan("label", exespace(), view_from, view_dest, init_value); - EXPECT_EQ(r, KE::end(view_dest)); + ASSERT_EQ(r, KE::end(view_dest)); verify_data(view_from, view_dest, init_value, default_op()); } @@ -263,7 +263,7 @@ void run_single_scenario_custom_op(const InfoType& scenario_info, auto r = KE::exclusive_scan(exespace(), KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), init_value, bop); - EXPECT_EQ(r, KE::end(view_dest)); + ASSERT_EQ(r, KE::end(view_dest)); verify_data(view_from, view_dest, init_value, bop); } @@ -272,7 +272,7 @@ void run_single_scenario_custom_op(const InfoType& scenario_info, auto r = KE::exclusive_scan("label", exespace(), KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), init_value, bop); - EXPECT_EQ(r, KE::end(view_dest)); + ASSERT_EQ(r, KE::end(view_dest)); verify_data(view_from, view_dest, init_value, bop); } @@ -280,7 +280,7 @@ void run_single_scenario_custom_op(const InfoType& scenario_info, fill_zero(view_dest); auto r = KE::exclusive_scan(exespace(), view_from, view_dest, init_value, bop); - EXPECT_EQ(r, KE::end(view_dest)); + ASSERT_EQ(r, KE::end(view_dest)); verify_data(view_from, view_dest, init_value, bop); } @@ -288,7 +288,7 @@ void run_single_scenario_custom_op(const InfoType& scenario_info, fill_zero(view_dest); auto r = KE::exclusive_scan("label", exespace(), view_from, view_dest, init_value, bop); - EXPECT_EQ(r, KE::end(view_dest)); + ASSERT_EQ(r, KE::end(view_dest)); verify_data(view_from, view_dest, init_value, bop); } @@ -344,6 +344,46 @@ TEST(std_algorithms_numeric_ops_test, exclusive_scan) { run_exclusive_scan_all_scenarios(); } +TEST(std_algorithms_numeric_ops_test, exclusive_scan_functor) { + int dummy = 0; + using view_type = Kokkos::View; + view_type dummy_view("dummy_view", 0); + using functor_type = Kokkos::Experimental::Impl::ExclusiveScanDefaultFunctor< + exespace, int, int, view_type, view_type>; + functor_type functor(dummy, dummy_view, dummy_view); + using value_type = functor_type::value_type; + + value_type value1; + functor.init(value1); + ASSERT_EQ(value1.val, 0); + ASSERT_EQ(value1.is_initial, true); + + value_type value2; + value2.val = 1; + value2.is_initial = false; + functor.join(value1, value2); + ASSERT_EQ(value1.val, 1); + ASSERT_EQ(value1.is_initial, false); + + functor.init(value1); + functor.join(value2, value1); + ASSERT_EQ(value2.val, 1); + ASSERT_EQ(value2.is_initial, false); + + functor.init(value2); + functor.join(value2, value1); + ASSERT_EQ(value2.val, 0); + ASSERT_EQ(value2.is_initial, true); + + value1.val = 1; + value1.is_initial = false; + value2.val = 2; + value2.is_initial = false; + functor.join(value2, value1); + ASSERT_EQ(value2.val, 3); + ASSERT_EQ(value2.is_initial, false); +} + } // namespace EScan } // namespace stdalgos } // namespace Test diff --git a/algorithms/unit_tests/TestStdAlgorithmsFind.cpp b/algorithms/unit_tests/TestStdAlgorithmsFind.cpp index 3b8b5e85af..2692df6982 100644 --- a/algorithms/unit_tests/TestStdAlgorithmsFind.cpp +++ b/algorithms/unit_tests/TestStdAlgorithmsFind.cpp @@ -34,14 +34,14 @@ void test_find(const ViewType view) { constexpr value_t find_value = 13; // value not found, return last - EXPECT_EQ(KE::end(expected), + ASSERT_EQ(KE::end(expected), std::find(KE::begin(expected), KE::end(expected), find_value)); // pass const iterators, returns const iterator - EXPECT_EQ(KE::cend(view), + ASSERT_EQ(KE::cend(view), KE::find(exespace(), KE::cbegin(view), KE::cend(view), find_value)); // pass view, returns iterator - EXPECT_EQ(KE::end(view), KE::find(exespace(), view, find_value)); + ASSERT_EQ(KE::end(view), KE::find(exespace(), view, find_value)); fill_views_inc(view, expected); @@ -50,10 +50,10 @@ void test_find(const ViewType view) { auto distance = std::distance(KE::begin(expected), std_result); // pass iterators, returns iterator - EXPECT_EQ(KE::begin(view) + distance, + ASSERT_EQ(KE::begin(view) + distance, KE::find(exespace(), KE::begin(view), KE::end(view), find_value)); // pass view, returns iterator - EXPECT_EQ(KE::begin(view) + distance, KE::find(exespace(), view, find_value)); + ASSERT_EQ(KE::begin(view) + distance, KE::find(exespace(), view, find_value)); } template @@ -67,15 +67,15 @@ void test_find_if(const ViewType view) { const auto not_equals_zero = NotEqualsZeroFunctor(); // value not found, return last - EXPECT_EQ( + ASSERT_EQ( KE::end(expected), std::find_if(KE::begin(expected), KE::end(expected), not_equals_zero)); // pass iterators, returns iterator - EXPECT_EQ(KE::end(view), KE::find_if(exespace(), KE::begin(view), + ASSERT_EQ(KE::end(view), KE::find_if(exespace(), KE::begin(view), KE::end(view), not_equals_zero)); // pass view, returns iterator - EXPECT_EQ(KE::end(view), KE::find_if(exespace(), view, not_equals_zero)); + ASSERT_EQ(KE::end(view), KE::find_if(exespace(), view, not_equals_zero)); fill_views_inc(view, expected); @@ -86,11 +86,11 @@ void test_find_if(const ViewType view) { auto distance = std::distance(KE::begin(expected), std_result); // pass const iterators, returns const iterator - EXPECT_EQ( + ASSERT_EQ( KE::cbegin(view) + distance, KE::find_if(exespace(), KE::cbegin(view), KE::cend(view), equals_val)); // pass view, returns iterator - EXPECT_EQ(KE::begin(view) + distance, + ASSERT_EQ(KE::begin(view) + distance, KE::find_if(exespace(), view, equals_val)); } @@ -105,15 +105,15 @@ void test_find_if_not(const ViewType view) { const auto not_equals_zero = NotEqualsZeroFunctor(); // first value matches - EXPECT_EQ(KE::begin(expected), + ASSERT_EQ(KE::begin(expected), std::find_if_not(KE::begin(expected), KE::end(expected), not_equals_zero)); // pass iterators, returns iterator - EXPECT_EQ(KE::begin(view), KE::find_if_not(exespace(), KE::begin(view), + ASSERT_EQ(KE::begin(view), KE::find_if_not(exespace(), KE::begin(view), KE::end(view), not_equals_zero)); // pass view, returns iterator - EXPECT_EQ(KE::begin(view), + ASSERT_EQ(KE::begin(view), KE::find_if_not(exespace(), view, not_equals_zero)); fill_views_inc(view, expected); @@ -124,11 +124,11 @@ void test_find_if_not(const ViewType view) { auto distance = std::distance(KE::begin(expected), std_result); // pass const iterators, returns const iterator - EXPECT_EQ(KE::cbegin(view) + distance, + ASSERT_EQ(KE::cbegin(view) + distance, KE::find_if_not(exespace(), KE::cbegin(view), KE::cend(view), equals_zero)); // pass view, returns const iterator - EXPECT_EQ(KE::begin(view) + distance, + ASSERT_EQ(KE::begin(view) + distance, KE::find_if_not(exespace(), view, equals_zero)); } @@ -151,12 +151,6 @@ void run_all_scenarios() { } TEST(std_algorithms_find_test, test) { -#if defined(KOKKOS_ENABLE_CUDA) && \ - defined(KOKKOS_COMPILER_NVHPC) // FIXME_NVHPC - if constexpr (std::is_same_v) { - GTEST_SKIP() << "FIXME wrong result"; - } -#endif run_all_scenarios(); run_all_scenarios(); run_all_scenarios(); diff --git a/algorithms/unit_tests/TestStdAlgorithmsFindEnd.cpp b/algorithms/unit_tests/TestStdAlgorithmsFindEnd.cpp index ddc4bc1ba6..5a5359b0b2 100644 --- a/algorithms/unit_tests/TestStdAlgorithmsFindEnd.cpp +++ b/algorithms/unit_tests/TestStdAlgorithmsFindEnd.cpp @@ -282,7 +282,7 @@ void run_single_scenario(const InfoType& scenario_info, std::size_t seq_ext, const auto mydiff = myrit - KE::cbegin(view); const auto stddiff = stdrit - KE::cbegin(view_h); // std::cout << "result : " << mydiff << " " << stddiff << std::endl; - EXPECT_EQ(mydiff, stddiff); + ASSERT_EQ(mydiff, stddiff); } { @@ -291,21 +291,21 @@ void run_single_scenario(const InfoType& scenario_info, std::size_t seq_ext, KE::cbegin(s_view), KE::cend(s_view), args...); const auto mydiff = myrit - KE::cbegin(view); const auto stddiff = stdrit - KE::cbegin(view_h); - EXPECT_EQ(mydiff, stddiff); + ASSERT_EQ(mydiff, stddiff); } { auto myrit = KE::find_end(exespace(), view, s_view, args...); const auto mydiff = myrit - KE::begin(view); const auto stddiff = stdrit - KE::cbegin(view_h); - EXPECT_EQ(mydiff, stddiff); + ASSERT_EQ(mydiff, stddiff); } { auto myrit = KE::find_end("label", exespace(), view, s_view, args...); const auto mydiff = myrit - KE::begin(view); const auto stddiff = stdrit - KE::cbegin(view_h); - EXPECT_EQ(mydiff, stddiff); + ASSERT_EQ(mydiff, stddiff); } Kokkos::fence(); @@ -348,12 +348,6 @@ void run_all_scenarios() { } TEST(std_algorithms_non_mod_seq_ops, find_end) { -#if defined(KOKKOS_ENABLE_CUDA) && \ - defined(KOKKOS_COMPILER_NVHPC) // FIXME_NVHPC - if constexpr (std::is_same_v) { - GTEST_SKIP() << "FIXME wrong result"; - } -#endif run_all_scenarios(); run_all_scenarios(); } diff --git a/algorithms/unit_tests/TestStdAlgorithmsFindFirstOf.cpp b/algorithms/unit_tests/TestStdAlgorithmsFindFirstOf.cpp index c2f7a2fdb8..d77edb5fed 100644 --- a/algorithms/unit_tests/TestStdAlgorithmsFindFirstOf.cpp +++ b/algorithms/unit_tests/TestStdAlgorithmsFindFirstOf.cpp @@ -201,7 +201,7 @@ void run_single_scenario(const InfoType& scenario_info, std::size_t seq_ext, KE::cbegin(s_view), KE::cend(s_view), args...); const auto mydiff = myrit - KE::cbegin(view); const auto stddiff = stdrit - KE::cbegin(view_h); - EXPECT_EQ(mydiff, stddiff); + ASSERT_EQ(mydiff, stddiff); } { @@ -210,21 +210,21 @@ void run_single_scenario(const InfoType& scenario_info, std::size_t seq_ext, KE::cbegin(s_view), KE::cend(s_view), args...); const auto mydiff = myrit - KE::cbegin(view); const auto stddiff = stdrit - KE::cbegin(view_h); - EXPECT_EQ(mydiff, stddiff); + ASSERT_EQ(mydiff, stddiff); } { auto myrit = KE::find_first_of(exespace(), view, s_view, args...); const auto mydiff = myrit - KE::begin(view); const auto stddiff = stdrit - KE::cbegin(view_h); - EXPECT_EQ(mydiff, stddiff); + ASSERT_EQ(mydiff, stddiff); } { auto myrit = KE::find_first_of("label", exespace(), view, s_view, args...); const auto mydiff = myrit - KE::begin(view); const auto stddiff = stdrit - KE::cbegin(view_h); - EXPECT_EQ(mydiff, stddiff); + ASSERT_EQ(mydiff, stddiff); } Kokkos::fence(); @@ -264,12 +264,6 @@ void run_all_scenarios() { } TEST(std_algorithms_non_mod_seq_ops, find_first_of) { -#if defined(KOKKOS_ENABLE_CUDA) && \ - defined(KOKKOS_COMPILER_NVHPC) // FIXME_NVHPC - if constexpr (std::is_same_v) { - GTEST_SKIP() << "FIXME wrong result"; - } -#endif run_all_scenarios(); run_all_scenarios(); } diff --git a/algorithms/unit_tests/TestStdAlgorithmsForEach.cpp b/algorithms/unit_tests/TestStdAlgorithmsForEach.cpp index 83b44f01aa..793b98a67f 100644 --- a/algorithms/unit_tests/TestStdAlgorithmsForEach.cpp +++ b/algorithms/unit_tests/TestStdAlgorithmsForEach.cpp @@ -91,23 +91,23 @@ void test_for_each_n(const ViewType view) { const auto non_mod_functor = NoOpNonMutableFunctor(); // pass const iterators, functor takes const ref - EXPECT_EQ(KE::cbegin(view) + n, + ASSERT_EQ(KE::cbegin(view) + n, KE::for_each_n(exespace(), KE::cbegin(view), n, non_mod_functor)); verify_values(value_t{0}, view); // pass view, functor takes const ref - EXPECT_EQ(KE::begin(view) + n, + ASSERT_EQ(KE::begin(view) + n, KE::for_each_n(exespace(), view, n, non_mod_functor)); verify_values(value_t{0}, view); // pass iterators, functor takes non-const ref const auto mod_functor = IncrementElementWiseFunctor(); - EXPECT_EQ(KE::begin(view) + n, + ASSERT_EQ(KE::begin(view) + n, KE::for_each_n(exespace(), KE::begin(view), n, mod_functor)); verify_values(value_t{1}, view); // pass view, functor takes non-const ref - EXPECT_EQ(KE::begin(view) + n, + ASSERT_EQ(KE::begin(view) + n, KE::for_each_n("label", exespace(), view, n, mod_functor)); verify_values(value_t{2}, view); } diff --git a/algorithms/unit_tests/TestStdAlgorithmsInclusiveScan.cpp b/algorithms/unit_tests/TestStdAlgorithmsInclusiveScan.cpp index 510f1d195a..8e60a43e5f 100644 --- a/algorithms/unit_tests/TestStdAlgorithmsInclusiveScan.cpp +++ b/algorithms/unit_tests/TestStdAlgorithmsInclusiveScan.cpp @@ -171,7 +171,7 @@ void verify_data(ViewType1 data_view, // contains data // << std::abs(gold_h(i) - test_view_h(i)) << std::endl; if (std::is_same::value) { - EXPECT_EQ(gold_h(i), test_view_h(i)); + ASSERT_EQ(gold_h(i), test_view_h(i)); } else { const auto error = std::abs(static_cast(gold_h(i) - test_view_h(i))); @@ -224,7 +224,7 @@ void run_single_scenario_default_op(const InfoType& scenario_info) { fill_zero(view_dest); auto r = KE::inclusive_scan(exespace(), KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest)); - EXPECT_EQ(r, KE::end(view_dest)); + ASSERT_EQ(r, KE::end(view_dest)); verify_data(view_from, view_dest, default_op()); } @@ -232,21 +232,21 @@ void run_single_scenario_default_op(const InfoType& scenario_info) { fill_zero(view_dest); auto r = KE::inclusive_scan("label", exespace(), KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest)); - EXPECT_EQ(r, KE::end(view_dest)); + ASSERT_EQ(r, KE::end(view_dest)); verify_data(view_from, view_dest, default_op()); } { fill_zero(view_dest); auto r = KE::inclusive_scan(exespace(), view_from, view_dest); - EXPECT_EQ(r, KE::end(view_dest)); + ASSERT_EQ(r, KE::end(view_dest)); verify_data(view_from, view_dest, default_op()); } { fill_zero(view_dest); auto r = KE::inclusive_scan("label", exespace(), view_from, view_dest); - EXPECT_EQ(r, KE::end(view_dest)); + ASSERT_EQ(r, KE::end(view_dest)); verify_data(view_from, view_dest, default_op()); } @@ -279,7 +279,7 @@ void run_single_scenario_custom_op(const InfoType& scenario_info, BinaryOp bop, auto r = KE::inclusive_scan(exespace(), KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), bop, args...); - EXPECT_EQ(r, KE::end(view_dest)); + ASSERT_EQ(r, KE::end(view_dest)); verify_data(view_from, view_dest, bop, args...); } @@ -288,14 +288,14 @@ void run_single_scenario_custom_op(const InfoType& scenario_info, BinaryOp bop, auto r = KE::inclusive_scan("label", exespace(), KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), bop, args...); - EXPECT_EQ(r, KE::end(view_dest)); + ASSERT_EQ(r, KE::end(view_dest)); verify_data(view_from, view_dest, bop, args...); } { fill_zero(view_dest); auto r = KE::inclusive_scan(exespace(), view_from, view_dest, bop, args...); - EXPECT_EQ(r, KE::end(view_dest)); + ASSERT_EQ(r, KE::end(view_dest)); verify_data(view_from, view_dest, bop, args...); } @@ -303,7 +303,7 @@ void run_single_scenario_custom_op(const InfoType& scenario_info, BinaryOp bop, fill_zero(view_dest); auto r = KE::inclusive_scan("label", exespace(), view_from, view_dest, bop, args...); - EXPECT_EQ(r, KE::end(view_dest)); + ASSERT_EQ(r, KE::end(view_dest)); verify_data(view_from, view_dest, bop, args...); } @@ -353,6 +353,45 @@ TEST(std_algorithms_numeric_ops_test, inclusive_scan) { run_inclusive_scan_all_scenarios(); } +TEST(std_algorithms_numeric_ops_test, inclusive_scan_functor) { + using view_type = Kokkos::View; + view_type dummy_view("dummy_view", 0); + using functor_type = Kokkos::Experimental::Impl::InclusiveScanDefaultFunctor< + exespace, int, int, view_type, view_type>; + functor_type functor(dummy_view, dummy_view); + using value_type = functor_type::value_type; + + value_type value1; + functor.init(value1); + ASSERT_EQ(value1.val, 0); + ASSERT_EQ(value1.is_initial, true); + + value_type value2; + value2.val = 1; + value2.is_initial = false; + functor.join(value1, value2); + ASSERT_EQ(value1.val, 1); + ASSERT_EQ(value1.is_initial, false); + + functor.init(value1); + functor.join(value2, value1); + ASSERT_EQ(value2.val, 1); + ASSERT_EQ(value2.is_initial, false); + + functor.init(value2); + functor.join(value2, value1); + ASSERT_EQ(value2.val, 0); + ASSERT_EQ(value2.is_initial, true); + + value1.val = 1; + value1.is_initial = false; + value2.val = 2; + value2.is_initial = false; + functor.join(value2, value1); + ASSERT_EQ(value2.val, 3); + ASSERT_EQ(value2.is_initial, false); +} + } // namespace IncScan } // namespace stdalgos } // namespace Test diff --git a/algorithms/unit_tests/TestStdAlgorithmsIsSortedUntil.cpp b/algorithms/unit_tests/TestStdAlgorithmsIsSortedUntil.cpp index ce8669a84f..dcfe8ad67e 100644 --- a/algorithms/unit_tests/TestStdAlgorithmsIsSortedUntil.cpp +++ b/algorithms/unit_tests/TestStdAlgorithmsIsSortedUntil.cpp @@ -145,10 +145,10 @@ void run_single_scenario(const InfoType& scenario_info) { KE::is_sorted_until("label", exespace(), KE::begin(view), KE::end(view)); auto r3 = KE::is_sorted_until(exespace(), view); auto r4 = KE::is_sorted_until("label", exespace(), view); - EXPECT_EQ(r1, gold); - EXPECT_EQ(r2, gold); - EXPECT_EQ(r3, gold); - EXPECT_EQ(r4, gold); + ASSERT_EQ(r1, gold); + ASSERT_EQ(r2, gold); + ASSERT_EQ(r3, gold); + ASSERT_EQ(r4, gold); #if !defined KOKKOS_ENABLE_OPENMPTARGET CustomLessThanComparator comp; @@ -160,10 +160,10 @@ void run_single_scenario(const InfoType& scenario_info) { auto r8 = KE::is_sorted_until("label", exespace(), view, comp); #endif - EXPECT_EQ(r1, gold); - EXPECT_EQ(r2, gold); - EXPECT_EQ(r3, gold); - EXPECT_EQ(r4, gold); + ASSERT_EQ(r1, gold); + ASSERT_EQ(r2, gold); + ASSERT_EQ(r3, gold); + ASSERT_EQ(r4, gold); Kokkos::fence(); } @@ -185,12 +185,6 @@ void run_is_sorted_until_all_scenarios() { } TEST(std_algorithms_sorting_ops_test, is_sorted_until) { -#if defined(KOKKOS_ENABLE_CUDA) && \ - defined(KOKKOS_COMPILER_NVHPC) // FIXME_NVHPC - if constexpr (std::is_same_v) { - GTEST_SKIP() << "FIXME wrong result"; - } -#endif run_is_sorted_until_all_scenarios(); run_is_sorted_until_all_scenarios(); run_is_sorted_until_all_scenarios(); diff --git a/algorithms/unit_tests/TestStdAlgorithmsLexicographicalCompare.cpp b/algorithms/unit_tests/TestStdAlgorithmsLexicographicalCompare.cpp index 2acd4934ac..5d9e7db803 100644 --- a/algorithms/unit_tests/TestStdAlgorithmsLexicographicalCompare.cpp +++ b/algorithms/unit_tests/TestStdAlgorithmsLexicographicalCompare.cpp @@ -44,16 +44,16 @@ void test_lexicographical_compare(const ViewType1 view_1, ViewType2 view_2) { std::lexicographical_compare(h_first_1, h_last_1, h_first_2, h_last_2); // pass iterators - EXPECT_EQ(std_result, KE::lexicographical_compare(exespace(), first_1, + ASSERT_EQ(std_result, KE::lexicographical_compare(exespace(), first_1, last_1, first_2, last_2)); - EXPECT_EQ(std_result, + ASSERT_EQ(std_result, KE::lexicographical_compare("label", exespace(), first_1, last_1, first_2, last_2)); // pass views - EXPECT_EQ(std_result, + ASSERT_EQ(std_result, KE::lexicographical_compare(exespace(), view_1, view_2)); - EXPECT_EQ(std_result, + ASSERT_EQ(std_result, KE::lexicographical_compare("label", exespace(), view_1, view_2)); } @@ -67,17 +67,17 @@ void test_lexicographical_compare(const ViewType1 view_1, ViewType2 view_2) { h_first_1, h_last_1, h_first_2, h_last_2, custom_comparator); // pass iterators - EXPECT_EQ(std_result, + ASSERT_EQ(std_result, KE::lexicographical_compare(exespace(), first_1, last_1, first_2, last_2, custom_comparator)); - EXPECT_EQ(std_result, + ASSERT_EQ(std_result, KE::lexicographical_compare("label", exespace(), first_1, last_1, first_2, last_2, custom_comparator)); // pass views - EXPECT_EQ(std_result, KE::lexicographical_compare( + ASSERT_EQ(std_result, KE::lexicographical_compare( exespace(), view_1, view_2, custom_comparator)); - EXPECT_EQ(std_result, + ASSERT_EQ(std_result, KE::lexicographical_compare("label", exespace(), view_1, view_2, custom_comparator)); } @@ -86,7 +86,7 @@ void test_lexicographical_compare(const ViewType1 view_1, ViewType2 view_2) { // empty vs non-empty auto std_result = std::lexicographical_compare(h_first_1, h_first_1, h_first_2, h_last_2); - EXPECT_EQ(std_result, KE::lexicographical_compare( + ASSERT_EQ(std_result, KE::lexicographical_compare( exespace(), first_1, first_1, first_2, last_2)); } @@ -95,7 +95,7 @@ void test_lexicographical_compare(const ViewType1 view_1, ViewType2 view_2) { if (view_1.extent(0) > 1) { auto std_result = std::lexicographical_compare(h_first_1, h_last_1 - 1, h_first_2, h_last_2); - EXPECT_EQ(std_result, + ASSERT_EQ(std_result, KE::lexicographical_compare(exespace(), first_1, last_1 - 1, first_2, last_2)); } @@ -140,12 +140,6 @@ void run_all_scenarios() { } TEST(std_algorithms_lexicographical_compare_test, test) { -#if defined(KOKKOS_ENABLE_CUDA) && \ - defined(KOKKOS_COMPILER_NVHPC) // FIXME_NVHPC - if constexpr (std::is_same_v) { - GTEST_SKIP() << "FIXME wrong result"; - } -#endif // FIXME: should this disable only custom comparator tests? #if !defined KOKKOS_ENABLE_OPENMPTARGET run_all_scenarios(); diff --git a/algorithms/unit_tests/TestStdAlgorithmsMinMaxElementOps.cpp b/algorithms/unit_tests/TestStdAlgorithmsMinMaxElementOps.cpp index f8634ffafe..bc43231784 100644 --- a/algorithms/unit_tests/TestStdAlgorithmsMinMaxElementOps.cpp +++ b/algorithms/unit_tests/TestStdAlgorithmsMinMaxElementOps.cpp @@ -173,7 +173,7 @@ void std_algo_min_max_test_verify(Kokkos::pair goldPair, const ItType result, TestedViewType testedView) { // check that iterator is pointing to right element - EXPECT_EQ(result - KE::begin(testedView), goldPair.first); + ASSERT_EQ(result - KE::begin(testedView), goldPair.first); // create a view for the result to copy into it the iterator's value using result_view_t = Kokkos::View; @@ -184,7 +184,7 @@ void std_algo_min_max_test_verify(Kokkos::pair goldPair, Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), resultView); // use the host mirror of the result view to check that the values match - EXPECT_EQ(result_v_h(), goldPair.second); + ASSERT_EQ(result_v_h(), goldPair.second); } template @@ -199,39 +199,39 @@ template void test_max_element_trivial_data(ViewType view) { /* if we pass empty range, should return last */ auto result = KE::max_element(exespace(), KE::cbegin(view), KE::cbegin(view)); - EXPECT_EQ(result, KE::cbegin(view)); + ASSERT_EQ(result, KE::cbegin(view)); /* if we pass empty range, should return last */ auto it0 = KE::cbegin(view) + 3; auto it1 = it0; auto result2 = KE::max_element(exespace(), it0, it1); - EXPECT_EQ(result2, it1); + ASSERT_EQ(result2, it1); } template void test_min_element_trivial_data(ViewType view) { /* if we pass empty range, should return last */ auto result = KE::min_element(exespace(), KE::cbegin(view), KE::cbegin(view)); - EXPECT_EQ(result, KE::cbegin(view)); + ASSERT_EQ(result, KE::cbegin(view)); /* if we pass empty range, should return last */ auto it0 = KE::cbegin(view) + 3; auto it1 = it0; auto result2 = KE::min_element(exespace(), it0, it1); - EXPECT_EQ(result2, it1); + ASSERT_EQ(result2, it1); } template void test_minmax_element_empty_range(ViewType view) { auto result = KE::minmax_element(exespace(), KE::cbegin(view), KE::cbegin(view)); - EXPECT_EQ(result.first, KE::cbegin(view)); - EXPECT_EQ(result.second, KE::cbegin(view)); + ASSERT_EQ(result.first, KE::cbegin(view)); + ASSERT_EQ(result.second, KE::cbegin(view)); auto it0 = KE::cbegin(view) + 3; auto it1 = it0; auto result2 = KE::minmax_element(exespace(), it0, it1); - EXPECT_EQ(result2.first, it1); - EXPECT_EQ(result2.second, it1); + ASSERT_EQ(result2.first, it1); + ASSERT_EQ(result2.second, it1); } template diff --git a/algorithms/unit_tests/TestStdAlgorithmsMismatch.cpp b/algorithms/unit_tests/TestStdAlgorithmsMismatch.cpp index bb4b6fb2a2..f3b3e269c4 100644 --- a/algorithms/unit_tests/TestStdAlgorithmsMismatch.cpp +++ b/algorithms/unit_tests/TestStdAlgorithmsMismatch.cpp @@ -120,10 +120,10 @@ void run_single_scenario(ViewType view1, ViewType view2, const auto my_diff12 = my_res1.second - f2; const auto my_diff21 = my_res2.first - f1; const auto my_diff22 = my_res2.second - f2; - EXPECT_EQ(my_diff11, std_diff1); - EXPECT_EQ(my_diff12, std_diff2); - EXPECT_EQ(my_diff21, std_diff1); - EXPECT_EQ(my_diff22, std_diff2); + ASSERT_EQ(my_diff11, std_diff1); + ASSERT_EQ(my_diff12, std_diff2); + ASSERT_EQ(my_diff21, std_diff1); + ASSERT_EQ(my_diff22, std_diff2); } { @@ -134,10 +134,10 @@ void run_single_scenario(ViewType view1, ViewType view2, const auto my_diff12 = my_res1.second - KE::begin(view2); const auto my_diff21 = my_res2.first - KE::begin(view1); const auto my_diff22 = my_res2.second - KE::begin(view2); - EXPECT_EQ(my_diff11, std_diff1); - EXPECT_EQ(my_diff12, std_diff2); - EXPECT_EQ(my_diff21, std_diff1); - EXPECT_EQ(my_diff22, std_diff2); + ASSERT_EQ(my_diff11, std_diff1); + ASSERT_EQ(my_diff12, std_diff2); + ASSERT_EQ(my_diff21, std_diff1); + ASSERT_EQ(my_diff22, std_diff2); } } @@ -189,12 +189,6 @@ void run_all_scenarios() { } TEST(std_algorithms_mismatch_test, test) { -#if defined(KOKKOS_ENABLE_CUDA) && \ - defined(KOKKOS_COMPILER_NVHPC) // FIXME_NVHPC - if constexpr (std::is_same_v) { - GTEST_SKIP() << "FIXME wrong result"; - } -#endif run_all_scenarios(); run_all_scenarios(); } diff --git a/algorithms/unit_tests/TestStdAlgorithmsModOps.cpp b/algorithms/unit_tests/TestStdAlgorithmsModOps.cpp index 4fce044bcf..4604764097 100644 --- a/algorithms/unit_tests/TestStdAlgorithmsModOps.cpp +++ b/algorithms/unit_tests/TestStdAlgorithmsModOps.cpp @@ -52,14 +52,14 @@ TEST(std_algorithms_mod_ops_test, move) { // move constr MyMovableType b(std::move(a)); - EXPECT_EQ(b.m_value, 11); - EXPECT_EQ(a.m_value, -2); + ASSERT_EQ(b.m_value, 11); + ASSERT_EQ(a.m_value, -2); // move assign MyMovableType c; c = std::move(b); - EXPECT_EQ(c.m_value, 11); - EXPECT_EQ(b.m_value, -4); + ASSERT_EQ(c.m_value, 11); + ASSERT_EQ(b.m_value, -4); } template @@ -97,8 +97,8 @@ TEST(std_algorithms_mod_ops_test, swap) { int a = 1; int b = 2; KE::swap(a, b); - EXPECT_EQ(a, 2); - EXPECT_EQ(b, 1); + ASSERT_EQ(a, 2); + ASSERT_EQ(b, 1); } { @@ -151,17 +151,17 @@ void test_iter_swap(ViewType view) { using value_type = typename ViewType::value_type; auto a_dc = create_deep_copyable_compatible_clone(view); auto a_h = create_mirror_view_and_copy(Kokkos::HostSpace(), a_dc); - EXPECT_EQ(view.extent_int(0), 10); - EXPECT_EQ(a_h(0), value_type(3)); - EXPECT_EQ(a_h(1), value_type(1)); - EXPECT_EQ(a_h(2), value_type(2)); - EXPECT_EQ(a_h(3), value_type(0)); - EXPECT_EQ(a_h(4), value_type(6)); - EXPECT_EQ(a_h(5), value_type(5)); - EXPECT_EQ(a_h(6), value_type(4)); - EXPECT_EQ(a_h(7), value_type(7)); - EXPECT_EQ(a_h(8), value_type(8)); - EXPECT_EQ(a_h(9), value_type(9)); + ASSERT_EQ(view.extent_int(0), 10); + ASSERT_EQ(a_h(0), value_type(3)); + ASSERT_EQ(a_h(1), value_type(1)); + ASSERT_EQ(a_h(2), value_type(2)); + ASSERT_EQ(a_h(3), value_type(0)); + ASSERT_EQ(a_h(4), value_type(6)); + ASSERT_EQ(a_h(5), value_type(5)); + ASSERT_EQ(a_h(6), value_type(4)); + ASSERT_EQ(a_h(7), value_type(7)); + ASSERT_EQ(a_h(8), value_type(8)); + ASSERT_EQ(a_h(9), value_type(9)); } TEST(std_algorithms_mod_ops_test, iter_swap_static_view) { diff --git a/algorithms/unit_tests/TestStdAlgorithmsModSeqOps.cpp b/algorithms/unit_tests/TestStdAlgorithmsModSeqOps.cpp index 6b806d7bc5..f80f30797e 100644 --- a/algorithms/unit_tests/TestStdAlgorithmsModSeqOps.cpp +++ b/algorithms/unit_tests/TestStdAlgorithmsModSeqOps.cpp @@ -34,21 +34,21 @@ struct std_algorithms_mod_seq_ops_test : std_algorithms_test { TEST_F(std_algorithms_mod_seq_ops_test, copy) { auto result = KE::copy(exespace(), KE::begin(m_static_view), KE::end(m_static_view), KE::begin(m_strided_view)); - EXPECT_EQ(KE::end(m_strided_view), result); + ASSERT_EQ(KE::end(m_strided_view), result); compare_views(m_static_view, m_strided_view); auto result2 = KE::copy(exespace(), KE::begin(m_strided_view), KE::end(m_strided_view), KE::begin(m_dynamic_view)); - EXPECT_EQ(KE::end(m_dynamic_view), result2); + ASSERT_EQ(KE::end(m_dynamic_view), result2); compare_views(m_dynamic_view, m_strided_view); } TEST_F(std_algorithms_mod_seq_ops_test, copy_view) { - EXPECT_EQ(KE::end(m_dynamic_view), + ASSERT_EQ(KE::end(m_dynamic_view), KE::copy(exespace(), m_static_view, m_dynamic_view)); compare_views(m_static_view, m_dynamic_view); - EXPECT_EQ(KE::end(m_strided_view), + ASSERT_EQ(KE::end(m_strided_view), KE::copy(exespace(), m_dynamic_view, m_strided_view)); compare_views(m_dynamic_view, m_strided_view); } @@ -70,11 +70,11 @@ TEST_F(std_algorithms_mod_seq_ops_test, copy_n) { // pass iterators auto first = KE::begin(m_static_view); auto dest = KE::begin(m_dynamic_view); - EXPECT_EQ(dest + n, KE::copy_n(exespace(), first, n, dest)); + ASSERT_EQ(dest + n, KE::copy_n(exespace(), first, n, dest)); compare_views(expected, m_dynamic_view); // pass views - EXPECT_EQ(KE::begin(m_strided_view) + n, + ASSERT_EQ(KE::begin(m_strided_view) + n, KE::copy_n(exespace(), m_static_view, n, m_strided_view)); compare_views(expected, m_strided_view); } @@ -85,12 +85,12 @@ TEST_F(std_algorithms_mod_seq_ops_test, copy_backward) { auto dest = KE::end(m_dynamic_view); // pass iterators - EXPECT_EQ(KE::begin(m_dynamic_view), + ASSERT_EQ(KE::begin(m_dynamic_view), KE::copy_backward(exespace(), first, last, dest)); compare_views(m_static_view, m_dynamic_view); // pass views - EXPECT_EQ(KE::begin(m_strided_view), + ASSERT_EQ(KE::begin(m_strided_view), KE::copy_backward(exespace(), m_static_view, m_strided_view)); compare_views(m_static_view, m_strided_view); } @@ -112,11 +112,11 @@ TEST_F(std_algorithms_mod_seq_ops_test, reverse_copy) { auto last = KE::end(m_static_view); auto dest = KE::begin(m_dynamic_view); - EXPECT_EQ(KE::end(m_dynamic_view), + ASSERT_EQ(KE::end(m_dynamic_view), KE::reverse_copy(exespace(), first, last, dest)); compare_views(expected, m_dynamic_view); - EXPECT_EQ(KE::end(m_strided_view), + ASSERT_EQ(KE::end(m_strided_view), KE::reverse_copy(exespace(), m_static_view, m_strided_view)); compare_views(expected, m_strided_view); } @@ -151,25 +151,25 @@ TEST_F(std_algorithms_mod_seq_ops_test, fill_n) { // fill all elements // pass iterator - EXPECT_EQ(KE::end(m_static_view), + ASSERT_EQ(KE::end(m_static_view), KE::fill_n(exespace(), KE::begin(m_static_view), m_static_view.extent(0), fill_n_value)); verify_values(fill_n_value, m_static_view); // pass view - EXPECT_EQ(KE::end(m_strided_view), + ASSERT_EQ(KE::end(m_strided_view), KE::fill_n(exespace(), m_strided_view, m_strided_view.extent(0), fill_n_value)); verify_values(fill_n_value, m_strided_view); // fill zero elements // pass view - EXPECT_EQ(KE::begin(m_dynamic_view), + ASSERT_EQ(KE::begin(m_dynamic_view), KE::fill_n(exespace(), m_dynamic_view, 0, fill_n_new_value)); // fill single element // pass iterator - EXPECT_EQ( + ASSERT_EQ( KE::begin(m_static_view) + 1, KE::fill_n(exespace(), KE::begin(m_static_view), 1, fill_n_new_value)); @@ -212,21 +212,21 @@ TEST_F(std_algorithms_mod_seq_ops_test, transform_from_fixture_unary_op) { auto r1 = KE::transform(exespace(), KE::begin(m_static_view), KE::end(m_static_view), KE::begin(m_dynamic_view), TransformFunctor()); - EXPECT_EQ(r1, KE::end(m_dynamic_view)); + ASSERT_EQ(r1, KE::end(m_dynamic_view)); compare_views(gold_source, m_static_view); verify_values(-1., m_dynamic_view); // transform dynamic view, store results in strided view auto r2 = KE::transform(exespace(), m_dynamic_view, m_strided_view, TransformFunctor()); - EXPECT_EQ(r2, KE::end(m_strided_view)); + ASSERT_EQ(r2, KE::end(m_strided_view)); verify_values(-1., m_dynamic_view); verify_values(-1., m_strided_view); // transform strided view, store results in static view auto r3 = KE::transform(exespace(), m_strided_view, m_static_view, TransformFunctor()); - EXPECT_EQ(r3, KE::end(m_static_view)); + ASSERT_EQ(r3, KE::end(m_static_view)); verify_values(-1., m_static_view); verify_values(-1., m_strided_view); } @@ -254,7 +254,7 @@ TEST_F(std_algorithms_mod_seq_ops_test, transform_from_fixture_binary_op) { auto r1 = KE::transform(exespace(), KE::begin(m_static_view), KE::end(m_static_view), KE::begin(m_dynamic_view), KE::begin(m_strided_view), TransformBinaryFunctor()); - EXPECT_EQ(r1, KE::end(m_strided_view)); + ASSERT_EQ(r1, KE::end(m_strided_view)); compare_views(expected, m_strided_view); expected(0) = 0; @@ -269,7 +269,7 @@ TEST_F(std_algorithms_mod_seq_ops_test, transform_from_fixture_binary_op) { expected(9) = 18; auto r2 = KE::transform("label", exespace(), m_static_view, m_strided_view, m_dynamic_view, TransformBinaryFunctor()); - EXPECT_EQ(r2, KE::end(m_dynamic_view)); + ASSERT_EQ(r2, KE::end(m_dynamic_view)); compare_views(expected, m_dynamic_view); } @@ -296,19 +296,19 @@ TEST_F(std_algorithms_mod_seq_ops_test, generate) { TEST_F(std_algorithms_mod_seq_ops_test, generate_n) { // iterator + functor - EXPECT_EQ(KE::end(m_static_view), + ASSERT_EQ(KE::end(m_static_view), KE::generate_n(exespace(), KE::begin(m_static_view), m_static_view.extent(0), GenerateFunctor())); verify_values(generated_value, m_static_view); // view + functor - EXPECT_EQ(KE::end(m_dynamic_view), + ASSERT_EQ(KE::end(m_dynamic_view), KE::generate_n(exespace(), m_dynamic_view, m_dynamic_view.extent(0), GenerateFunctor())); verify_values(generated_value, m_dynamic_view); // view + functor, negative n - EXPECT_EQ(KE::begin(m_strided_view), + ASSERT_EQ(KE::begin(m_strided_view), KE::generate_n(exespace(), m_strided_view, -1, GenerateFunctor())); } @@ -352,7 +352,7 @@ void test_swap_ranges(ViewType view) { auto last1 = first1 + 4; auto first2 = KE::begin(viewB) + 1; auto r = KE::swap_ranges(exespace(), first1, last1, first2); - EXPECT_EQ(r, first2 + 4); + ASSERT_EQ(r, first2 + 4); /* check VIEW_A */ static_view_type checkViewA("tmp"); @@ -360,16 +360,16 @@ void test_swap_ranges(ViewType view) { parallel_for(ext, cp_func_a_t(view, checkViewA)); auto cvA_h = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), checkViewA); - EXPECT_EQ(cvA_h(0), 0); - EXPECT_EQ(cvA_h(1), 1); - EXPECT_EQ(cvA_h(2), 99); - EXPECT_EQ(cvA_h(3), 98); - EXPECT_EQ(cvA_h(4), 97); - EXPECT_EQ(cvA_h(5), 96); - EXPECT_EQ(cvA_h(6), 6); - EXPECT_EQ(cvA_h(7), 7); - EXPECT_EQ(cvA_h(8), 8); - EXPECT_EQ(cvA_h(9), 9); + ASSERT_EQ(cvA_h(0), 0); + ASSERT_EQ(cvA_h(1), 1); + ASSERT_EQ(cvA_h(2), 99); + ASSERT_EQ(cvA_h(3), 98); + ASSERT_EQ(cvA_h(4), 97); + ASSERT_EQ(cvA_h(5), 96); + ASSERT_EQ(cvA_h(6), 6); + ASSERT_EQ(cvA_h(7), 7); + ASSERT_EQ(cvA_h(8), 8); + ASSERT_EQ(cvA_h(9), 9); /* check viewB */ static_view_type checkViewB("tmpB"); @@ -377,16 +377,16 @@ void test_swap_ranges(ViewType view) { Kokkos::parallel_for(ext, cp_func_b_t(viewB, checkViewB)); auto cvB_h = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), checkViewB); - EXPECT_EQ(cvB_h(0), 100); - EXPECT_EQ(cvB_h(1), 2); - EXPECT_EQ(cvB_h(2), 3); - EXPECT_EQ(cvB_h(3), 4); - EXPECT_EQ(cvB_h(4), 5); - EXPECT_EQ(cvB_h(5), 95); - EXPECT_EQ(cvB_h(6), 94); - EXPECT_EQ(cvB_h(7), 93); - EXPECT_EQ(cvB_h(8), 92); - EXPECT_EQ(cvB_h(9), 91); + ASSERT_EQ(cvB_h(0), 100); + ASSERT_EQ(cvB_h(1), 2); + ASSERT_EQ(cvB_h(2), 3); + ASSERT_EQ(cvB_h(3), 4); + ASSERT_EQ(cvB_h(4), 5); + ASSERT_EQ(cvB_h(5), 95); + ASSERT_EQ(cvB_h(6), 94); + ASSERT_EQ(cvB_h(7), 93); + ASSERT_EQ(cvB_h(8), 92); + ASSERT_EQ(cvB_h(9), 91); } TEST_F(std_algorithms_mod_seq_ops_test, swap_ranges) { diff --git a/algorithms/unit_tests/TestStdAlgorithmsMoveBackward.cpp b/algorithms/unit_tests/TestStdAlgorithmsMoveBackward.cpp index 635714eb54..b201ab95c1 100644 --- a/algorithms/unit_tests/TestStdAlgorithmsMoveBackward.cpp +++ b/algorithms/unit_tests/TestStdAlgorithmsMoveBackward.cpp @@ -53,20 +53,20 @@ void run_single_scenario(const InfoType& scenario_info, int apiId) { auto rit = KE::move_backward(exespace(), KE::begin(v), KE::end(v), KE::end(v2)); const int dist = KE::distance(KE::begin(v2), rit); - EXPECT_EQ(dist, 5); + ASSERT_EQ(dist, 5); } else if (apiId == 1) { auto rit = KE::move_backward("mylabel", exespace(), KE::begin(v), KE::end(v), KE::end(v2)); const int dist = KE::distance(KE::begin(v2), rit); - EXPECT_EQ(dist, 5); + ASSERT_EQ(dist, 5); } else if (apiId == 2) { auto rit = KE::move_backward(exespace(), v, v2); const int dist = KE::distance(KE::begin(v2), rit); - EXPECT_EQ(dist, 5); + ASSERT_EQ(dist, 5); } else if (apiId == 3) { auto rit = KE::move_backward("mylabel", exespace(), v, v2); const int dist = KE::distance(KE::begin(v2), rit); - EXPECT_EQ(dist, 5); + ASSERT_EQ(dist, 5); } // check diff --git a/algorithms/unit_tests/TestStdAlgorithmsNumerics.cpp b/algorithms/unit_tests/TestStdAlgorithmsNumerics.cpp index 288a67c369..0933c4e135 100644 --- a/algorithms/unit_tests/TestStdAlgorithmsNumerics.cpp +++ b/algorithms/unit_tests/TestStdAlgorithmsNumerics.cpp @@ -151,8 +151,8 @@ void run_and_check_transform_reduce_default(ViewType1 first_view, const auto r2 = KE::transform_reduce( "MYLABEL", ExecutionSpace(), KE::cbegin(first_view), KE::cbegin(first_view), KE::cbegin(second_view), init_value); - EXPECT_EQ(r1, init_value); - EXPECT_EQ(r2, init_value); + ASSERT_EQ(r1, init_value); + ASSERT_EQ(r2, init_value); // non-trivial cases const auto r3 = KE::transform_reduce(ExecutionSpace(), KE::cbegin(first_view), @@ -168,10 +168,10 @@ void run_and_check_transform_reduce_default(ViewType1 first_view, const auto r6 = KE::transform_reduce("MYLABEL", ExecutionSpace(), first_view, second_view, init_value); - EXPECT_EQ(r3, result_value); - EXPECT_EQ(r4, result_value); - EXPECT_EQ(r5, result_value); - EXPECT_EQ(r6, result_value); + ASSERT_EQ(r3, result_value); + ASSERT_EQ(r4, result_value); + ASSERT_EQ(r5, result_value); + ASSERT_EQ(r6, result_value); } TEST_F(std_algorithms_numerics_test, @@ -254,8 +254,8 @@ void run_and_check_transform_reduce_overloadA(ViewType1 first_view, KE::cbegin(first_view), KE::cbegin(second_view), init_value, std::forward(args)...); - EXPECT_EQ(r1, init_value); - EXPECT_EQ(r2, init_value); + ASSERT_EQ(r1, init_value); + ASSERT_EQ(r2, init_value); // non trivial cases const auto r3 = KE::transform_reduce( @@ -273,10 +273,10 @@ void run_and_check_transform_reduce_overloadA(ViewType1 first_view, KE::transform_reduce("MYLABEL", ExecutionSpace(), first_view, second_view, init_value, std::forward(args)...); - EXPECT_EQ(r3, result_value); - EXPECT_EQ(r4, result_value); - EXPECT_EQ(r5, result_value); - EXPECT_EQ(r6, result_value); + ASSERT_EQ(r3, result_value); + ASSERT_EQ(r4, result_value); + ASSERT_EQ(r5, result_value); + ASSERT_EQ(r6, result_value); } TEST_F(std_algorithms_numerics_test, @@ -373,8 +373,8 @@ void run_and_check_transform_reduce_overloadB(ViewType view, KE::cbegin(view), KE::cbegin(view), init_value, std::forward(args)...); - EXPECT_EQ(r1, init_value); - EXPECT_EQ(r2, init_value); + ASSERT_EQ(r1, init_value); + ASSERT_EQ(r2, init_value); // non trivial const auto r3 = @@ -390,10 +390,10 @@ void run_and_check_transform_reduce_overloadB(ViewType view, const auto r6 = KE::transform_reduce("MYLABEL", ExecutionSpace(), view, init_value, std::forward(args)...); - EXPECT_EQ(r3, result_value); - EXPECT_EQ(r4, result_value); - EXPECT_EQ(r5, result_value); - EXPECT_EQ(r6, result_value); + ASSERT_EQ(r3, result_value); + ASSERT_EQ(r4, result_value); + ASSERT_EQ(r5, result_value); + ASSERT_EQ(r6, result_value); } TEST_F(std_algorithms_numerics_test, @@ -447,8 +447,8 @@ void run_and_check_reduce_overloadA(ViewType view, ValueType non_trivial_result, KE::reduce(ExecutionSpace(), KE::cbegin(view), KE::cbegin(view)); const auto r2 = KE::reduce("MYLABEL", ExecutionSpace(), KE::cbegin(view), KE::cbegin(view)); - EXPECT_EQ(r1, trivial_result); - EXPECT_EQ(r2, trivial_result); + ASSERT_EQ(r1, trivial_result); + ASSERT_EQ(r2, trivial_result); // non trivial cases const auto r3 = @@ -458,10 +458,10 @@ void run_and_check_reduce_overloadA(ViewType view, ValueType non_trivial_result, const auto r5 = KE::reduce(ExecutionSpace(), view); const auto r6 = KE::reduce("MYLABEL", ExecutionSpace(), view); - EXPECT_EQ(r3, non_trivial_result); - EXPECT_EQ(r4, non_trivial_result); - EXPECT_EQ(r5, non_trivial_result); - EXPECT_EQ(r6, non_trivial_result); + ASSERT_EQ(r3, non_trivial_result); + ASSERT_EQ(r4, non_trivial_result); + ASSERT_EQ(r5, non_trivial_result); + ASSERT_EQ(r6, non_trivial_result); } TEST_F(std_algorithms_numerics_test, @@ -503,8 +503,8 @@ void run_and_check_reduce_overloadB(ViewType view, ValueType result_value, KE::cbegin(view), init_value); const auto r2 = KE::reduce("MYLABEL", ExecutionSpace(), KE::cbegin(view), KE::cbegin(view), init_value); - EXPECT_EQ(r1, init_value); - EXPECT_EQ(r2, init_value); + ASSERT_EQ(r1, init_value); + ASSERT_EQ(r2, init_value); // non trivial cases const auto r3 = KE::reduce(ExecutionSpace(), KE::cbegin(view), KE::cend(view), @@ -514,10 +514,10 @@ void run_and_check_reduce_overloadB(ViewType view, ValueType result_value, const auto r5 = KE::reduce(ExecutionSpace(), view, init_value); const auto r6 = KE::reduce("MYLABEL", ExecutionSpace(), view, init_value); - EXPECT_EQ(r3, result_value); - EXPECT_EQ(r4, result_value); - EXPECT_EQ(r5, result_value); - EXPECT_EQ(r6, result_value); + ASSERT_EQ(r3, result_value); + ASSERT_EQ(r4, result_value); + ASSERT_EQ(r5, result_value); + ASSERT_EQ(r6, result_value); } TEST_F(std_algorithms_numerics_test, @@ -553,8 +553,8 @@ void run_and_check_reduce_overloadC(ViewType view, ValueType result_value, KE::cbegin(view), init_value, joiner); const auto r2 = KE::reduce("MYLABEL", ExecutionSpace(), KE::cbegin(view), KE::cbegin(view), init_value, joiner); - EXPECT_EQ(r1, init_value); - EXPECT_EQ(r2, init_value); + ASSERT_EQ(r1, init_value); + ASSERT_EQ(r2, init_value); // non trivial cases const auto r3 = KE::reduce(ExecutionSpace(), KE::cbegin(view), KE::cend(view), @@ -565,10 +565,10 @@ void run_and_check_reduce_overloadC(ViewType view, ValueType result_value, const auto r6 = KE::reduce("MYLABEL", ExecutionSpace(), view, init_value, joiner); - EXPECT_EQ(r3, result_value); - EXPECT_EQ(r4, result_value); - EXPECT_EQ(r5, result_value); - EXPECT_EQ(r6, result_value); + ASSERT_EQ(r3, result_value); + ASSERT_EQ(r4, result_value); + ASSERT_EQ(r5, result_value); + ASSERT_EQ(r6, result_value); } TEST_F(std_algorithms_numerics_test, diff --git a/algorithms/unit_tests/TestStdAlgorithmsPartitionCopy.cpp b/algorithms/unit_tests/TestStdAlgorithmsPartitionCopy.cpp index 0399e9eee4..f169fd9ce8 100644 --- a/algorithms/unit_tests/TestStdAlgorithmsPartitionCopy.cpp +++ b/algorithms/unit_tests/TestStdAlgorithmsPartitionCopy.cpp @@ -130,12 +130,12 @@ void verify_data(const std::string& name, ResultType my_result, const std::size_t my_diff_true = my_result.first - KE::begin(view_dest_true); const std::size_t my_diff_false = my_result.second - KE::begin(view_dest_false); - EXPECT_EQ(std_diff_true, my_diff_true); - EXPECT_EQ(std_diff_false, my_diff_false); + ASSERT_EQ(std_diff_true, my_diff_true); + ASSERT_EQ(std_diff_false, my_diff_false); auto view_dest_true_h = create_host_space_copy(view_dest_true); for (std::size_t i = 0; i < std_diff_true; ++i) { - EXPECT_EQ(std_vec_true[i], view_dest_true_h(i)); + ASSERT_EQ(std_vec_true[i], view_dest_true_h(i)); // std::cout << "i= " << i << " " // << " std_true = " << std_vec_true[i] << " " // << " mine = " << view_dest_true_h(i) << '\n'; @@ -143,45 +143,45 @@ void verify_data(const std::string& name, ResultType my_result, auto view_dest_false_h = create_host_space_copy(view_dest_false); for (std::size_t i = 0; i < std_diff_false; ++i) { - EXPECT_EQ(std_vec_false[i], view_dest_false_h(i)); + ASSERT_EQ(std_vec_false[i], view_dest_false_h(i)); // std::cout << "i= " << i << " " // << " std_false = " << std_vec_false[i] << " " // << " mine = " << view_dest_false_h(i) << '\n'; } if (name == "empty") { - EXPECT_EQ(my_diff_true, 0u); - EXPECT_EQ(my_diff_false, 0u); + ASSERT_EQ(my_diff_true, 0u); + ASSERT_EQ(my_diff_false, 0u); } else if (name == "one-element-a") { - EXPECT_EQ(my_diff_true, 0u); - EXPECT_EQ(my_diff_false, 1u); + ASSERT_EQ(my_diff_true, 0u); + ASSERT_EQ(my_diff_false, 1u); } else if (name == "one-element-b") { - EXPECT_EQ(my_diff_true, 1u); - EXPECT_EQ(my_diff_false, 0u); + ASSERT_EQ(my_diff_true, 1u); + ASSERT_EQ(my_diff_false, 0u); } else if (name == "two-elements-a") { - EXPECT_EQ(my_diff_true, 1u); - EXPECT_EQ(my_diff_false, 1u); + ASSERT_EQ(my_diff_true, 1u); + ASSERT_EQ(my_diff_false, 1u); } else if (name == "two-elements-b") { - EXPECT_EQ(my_diff_true, 1u); - EXPECT_EQ(my_diff_false, 1u); + ASSERT_EQ(my_diff_true, 1u); + ASSERT_EQ(my_diff_false, 1u); } else if (name == "small-b") { - EXPECT_EQ(my_diff_true, 13u); - EXPECT_EQ(my_diff_false, 0u); + ASSERT_EQ(my_diff_true, 13u); + ASSERT_EQ(my_diff_false, 0u); } else if (name == "small-c") { - EXPECT_EQ(my_diff_true, 0u); - EXPECT_EQ(my_diff_false, 15u); + ASSERT_EQ(my_diff_true, 0u); + ASSERT_EQ(my_diff_false, 15u); } } diff --git a/algorithms/unit_tests/TestStdAlgorithmsPartitioningOps.cpp b/algorithms/unit_tests/TestStdAlgorithmsPartitioningOps.cpp index 1bfb536c2c..33a1326c47 100644 --- a/algorithms/unit_tests/TestStdAlgorithmsPartitioningOps.cpp +++ b/algorithms/unit_tests/TestStdAlgorithmsPartitioningOps.cpp @@ -148,12 +148,6 @@ struct std_algorithms_partitioning_test : public std_algorithms_test { }; TEST_F(std_algorithms_partitioning_test, is_partitioned_trivial) { -#if defined(KOKKOS_ENABLE_CUDA) && \ - defined(KOKKOS_COMPILER_NVHPC) // FIXME_NVHPC - if constexpr (std::is_same_v) { - GTEST_SKIP() << "FIXME wrong result"; - } -#endif IsNegativeFunctor p; const auto result1 = KE::is_partitioned(exespace(), KE::cbegin(m_static_view), KE::cbegin(m_static_view), p); @@ -169,12 +163,6 @@ TEST_F(std_algorithms_partitioning_test, is_partitioned_trivial) { } TEST_F(std_algorithms_partitioning_test, is_partitioned_accepting_iterators) { -#if defined(KOKKOS_ENABLE_CUDA) && \ - defined(KOKKOS_COMPILER_NVHPC) // FIXME_NVHPC - if constexpr (std::is_same_v) { - GTEST_SKIP() << "FIXME wrong result"; - } -#endif const IsNegativeFunctor p; for (int id = 0; id < FixtureViews::Count; ++id) { @@ -183,25 +171,19 @@ TEST_F(std_algorithms_partitioning_test, is_partitioned_accepting_iterators) { goldSolutionIsPartitioned(static_cast(id)); const auto result1 = KE::is_partitioned( exespace(), KE::cbegin(m_static_view), KE::cend(m_static_view), p); - EXPECT_EQ(goldBool, result1); + ASSERT_EQ(goldBool, result1); const auto result2 = KE::is_partitioned( exespace(), KE::cbegin(m_dynamic_view), KE::cend(m_dynamic_view), p); - EXPECT_EQ(goldBool, result2); + ASSERT_EQ(goldBool, result2); const auto result3 = KE::is_partitioned( exespace(), KE::cbegin(m_strided_view), KE::cend(m_strided_view), p); - EXPECT_EQ(goldBool, result3); + ASSERT_EQ(goldBool, result3); } } TEST_F(std_algorithms_partitioning_test, is_partitioned_accepting_view) { -#if defined(KOKKOS_ENABLE_CUDA) && \ - defined(KOKKOS_COMPILER_NVHPC) // FIXME_NVHPC - if constexpr (std::is_same_v) { - GTEST_SKIP() << "FIXME wrong result"; - } -#endif const IsNegativeFunctor p; for (int id = 0; id < FixtureViews::Count; ++id) { @@ -209,23 +191,17 @@ TEST_F(std_algorithms_partitioning_test, is_partitioned_accepting_view) { const bool goldBool = goldSolutionIsPartitioned(static_cast(id)); const auto result1 = KE::is_partitioned(exespace(), m_static_view, p); - EXPECT_EQ(goldBool, result1); + ASSERT_EQ(goldBool, result1); const auto result2 = KE::is_partitioned(exespace(), m_dynamic_view, p); - EXPECT_EQ(goldBool, result2); + ASSERT_EQ(goldBool, result2); const auto result3 = KE::is_partitioned(exespace(), m_strided_view, p); - EXPECT_EQ(goldBool, result3); + ASSERT_EQ(goldBool, result3); } } TEST_F(std_algorithms_partitioning_test, partition_point) { -#if defined(KOKKOS_ENABLE_CUDA) && \ - defined(KOKKOS_COMPILER_NVHPC) // FIXME_NVHPC - if constexpr (std::is_same_v) { - GTEST_SKIP() << "FIXME wrong result"; - } -#endif const IsNegativeFunctor p; for (int id = 0; id < FixtureViews::Count; ++id) { @@ -235,17 +211,17 @@ TEST_F(std_algorithms_partitioning_test, partition_point) { auto first1 = KE::cbegin(m_static_view); auto last1 = KE::cend(m_static_view); const auto result1 = KE::partition_point(exespace(), first1, last1, p); - EXPECT_EQ(goldIndex, result1 - first1); + ASSERT_EQ(goldIndex, result1 - first1); auto first2 = KE::cbegin(m_dynamic_view); auto last2 = KE::cend(m_dynamic_view); const auto result2 = KE::partition_point(exespace(), first2, last2, p); - EXPECT_EQ(goldIndex, result2 - first2); + ASSERT_EQ(goldIndex, result2 - first2); auto first3 = KE::cbegin(m_strided_view); auto last3 = KE::cend(m_strided_view); const auto result3 = KE::partition_point(exespace(), first3, last3, p); - EXPECT_EQ(goldIndex, result3 - first3); + ASSERT_EQ(goldIndex, result3 - first3); } } diff --git a/algorithms/unit_tests/TestStdAlgorithmsRemove.cpp b/algorithms/unit_tests/TestStdAlgorithmsRemove.cpp index 8832d71f95..c35fc5c24b 100644 --- a/algorithms/unit_tests/TestStdAlgorithmsRemove.cpp +++ b/algorithms/unit_tests/TestStdAlgorithmsRemove.cpp @@ -117,12 +117,12 @@ void verify_data(ViewTypeData view_data_h, ViewTypeTest view_test, // check that returned iterators are correct const std::size_t std_diff = std_result - KE::begin(view_data_h); const std::size_t my_diff = my_result - KE::begin(view_test); - EXPECT_EQ(std_diff, my_diff); + ASSERT_EQ(std_diff, my_diff); // check the actual data after algo has been applied auto view_test_h = create_host_space_copy(view_test); for (std::size_t i = 0; i < my_diff; ++i) { - EXPECT_EQ(view_test_h(i), view_data_h[i]); + ASSERT_EQ(view_test_h(i), view_data_h[i]); // std::cout << "i= " << i << " " // << "mine: " << view_test_h(i) << " " // << "std: " << view_data_h(i) diff --git a/algorithms/unit_tests/TestStdAlgorithmsRemoveCopy.cpp b/algorithms/unit_tests/TestStdAlgorithmsRemoveCopy.cpp index 949f8f60c9..3d7c52108b 100644 --- a/algorithms/unit_tests/TestStdAlgorithmsRemoveCopy.cpp +++ b/algorithms/unit_tests/TestStdAlgorithmsRemoveCopy.cpp @@ -135,12 +135,12 @@ void verify_data(ViewFromType view_from, ViewDestType view_dest, // check that returned iterators are correct const std::size_t std_diff = std_result - gold_dest_std.begin(); const std::size_t my_diff = my_result - KE::begin(view_dest); - EXPECT_EQ(std_diff, my_diff); + ASSERT_EQ(std_diff, my_diff); // check the actual data after algo has been applied auto view_dest_h = create_host_space_copy(view_dest); for (std::size_t i = 0; i < my_diff; ++i) { - EXPECT_EQ(view_dest_h(i), gold_dest_std[i]); + ASSERT_EQ(view_dest_h(i), gold_dest_std[i]); // std::cout << "i= " << i << " " // << "mine: " << view_dest_h(i) << " " // << "std: " << gold_dest_std[i] diff --git a/algorithms/unit_tests/TestStdAlgorithmsRemoveCopyIf.cpp b/algorithms/unit_tests/TestStdAlgorithmsRemoveCopyIf.cpp index 9dc1e4a7e1..cb699aa923 100644 --- a/algorithms/unit_tests/TestStdAlgorithmsRemoveCopyIf.cpp +++ b/algorithms/unit_tests/TestStdAlgorithmsRemoveCopyIf.cpp @@ -119,12 +119,12 @@ void verify_data(ViewTypeFrom view_from, ViewTypeDest view_dest, // check that returned iterators are correct const std::size_t std_diff = std_result - gold_dest_std.begin(); const std::size_t my_diff = my_result - KE::begin(view_dest); - EXPECT_EQ(std_diff, my_diff); + ASSERT_EQ(std_diff, my_diff); // check the actual data after algo has been applied auto view_dest_h = create_host_space_copy(view_dest); for (std::size_t i = 0; i < my_diff; ++i) { - EXPECT_EQ(view_dest_h(i), gold_dest_std[i]); + ASSERT_EQ(view_dest_h(i), gold_dest_std[i]); // std::cout << "i= " << i << " " // << "mine: " << view_dest_h(i) << " " // << "std: " << gold_dest_std[i] diff --git a/algorithms/unit_tests/TestStdAlgorithmsRemoveIf.cpp b/algorithms/unit_tests/TestStdAlgorithmsRemoveIf.cpp index e9d15f29d8..f06f2234ee 100644 --- a/algorithms/unit_tests/TestStdAlgorithmsRemoveIf.cpp +++ b/algorithms/unit_tests/TestStdAlgorithmsRemoveIf.cpp @@ -112,12 +112,12 @@ void verify_data(ViewTypeData view_data_h, ViewTypeTest view_test, // check that returned iterators are correct const std::size_t std_diff = std_result - KE::begin(view_data_h); const std::size_t my_diff = my_result - KE::begin(view_test); - EXPECT_EQ(std_diff, my_diff); + ASSERT_EQ(std_diff, my_diff); // check the actual data after algo has been applied auto view_test_h = create_host_space_copy(view_test); for (std::size_t i = 0; i < my_diff; ++i) { - EXPECT_EQ(view_test_h(i), view_data_h[i]); + ASSERT_EQ(view_test_h(i), view_data_h[i]); // std::cout << "i= " << i << " " // << "mine: " << view_test_h(i) << " " // << "std: " << view_data_h(i) diff --git a/algorithms/unit_tests/TestStdAlgorithmsReplace.cpp b/algorithms/unit_tests/TestStdAlgorithmsReplace.cpp index b226de5535..a22ab32d76 100644 --- a/algorithms/unit_tests/TestStdAlgorithmsReplace.cpp +++ b/algorithms/unit_tests/TestStdAlgorithmsReplace.cpp @@ -104,30 +104,30 @@ void verify_data(const std::string& name, ViewType1 test_view, } else if (name == "one-element-a") { - EXPECT_EQ(view_h(0), ValueType{1}); + ASSERT_EQ(view_h(0), ValueType{1}); } else if (name == "one-element-b") { - EXPECT_EQ(view_h(0), new_value); + ASSERT_EQ(view_h(0), new_value); } else if (name == "two-elements-a") { - EXPECT_EQ(view_h(0), ValueType{1}); - EXPECT_EQ(view_h(1), new_value); + ASSERT_EQ(view_h(0), ValueType{1}); + ASSERT_EQ(view_h(1), new_value); } else if (name == "two-elements-b") { - EXPECT_EQ(view_h(0), new_value); - EXPECT_EQ(view_h(1), ValueType{-1}); + ASSERT_EQ(view_h(0), new_value); + ASSERT_EQ(view_h(1), ValueType{-1}); } else if (name == "small-a") { for (std::size_t i = 0; i < view_h.extent(0); ++i) { if (i == 0 || i == 3 || i == 5 || i == 6) { - EXPECT_EQ(view_h(i), new_value); + ASSERT_EQ(view_h(i), new_value); } else { const auto gold = ValueType{-5} + static_cast(i + 1); - EXPECT_EQ(view_h(i), gold); + ASSERT_EQ(view_h(i), gold); } } } @@ -135,9 +135,9 @@ void verify_data(const std::string& name, ViewType1 test_view, else if (name == "small-b") { for (std::size_t i = 0; i < view_h.extent(0); ++i) { if (i < 4) { - EXPECT_EQ(view_h(i), ValueType{-1}); + ASSERT_EQ(view_h(i), ValueType{-1}); } else { - EXPECT_EQ(view_h(i), new_value); + ASSERT_EQ(view_h(i), new_value); } } } @@ -145,9 +145,9 @@ void verify_data(const std::string& name, ViewType1 test_view, else if (name == "medium" || name == "large") { for (std::size_t i = 0; i < view_h.extent(0); ++i) { if (i % 2 == 0) { - EXPECT_EQ(view_h(i), ValueType{-1}); + ASSERT_EQ(view_h(i), ValueType{-1}); } else { - EXPECT_EQ(view_h(i), new_value); + ASSERT_EQ(view_h(i), new_value); } } } diff --git a/algorithms/unit_tests/TestStdAlgorithmsReplaceCopy.cpp b/algorithms/unit_tests/TestStdAlgorithmsReplaceCopy.cpp index 16b181fdd2..a964ec8e17 100644 --- a/algorithms/unit_tests/TestStdAlgorithmsReplaceCopy.cpp +++ b/algorithms/unit_tests/TestStdAlgorithmsReplaceCopy.cpp @@ -112,40 +112,40 @@ void verify_data(const std::string& name, ViewTypeFrom view_from, } else if (name == "one-element-a") { - EXPECT_EQ(view_from_h(0), ValueType{1}); - EXPECT_EQ(view_test_h(0), view_from_h(0)); + ASSERT_EQ(view_from_h(0), ValueType{1}); + ASSERT_EQ(view_test_h(0), view_from_h(0)); } else if (name == "one-element-b") { - EXPECT_EQ(view_from_h(0), ValueType{2}); - EXPECT_EQ(view_test_h(0), new_value); + ASSERT_EQ(view_from_h(0), ValueType{2}); + ASSERT_EQ(view_test_h(0), new_value); } else if (name == "two-elements-a") { - EXPECT_EQ(view_from_h(0), ValueType{1}); - EXPECT_EQ(view_from_h(1), ValueType{2}); + ASSERT_EQ(view_from_h(0), ValueType{1}); + ASSERT_EQ(view_from_h(1), ValueType{2}); - EXPECT_EQ(view_test_h(0), view_from_h(0)); - EXPECT_EQ(view_test_h(1), new_value); + ASSERT_EQ(view_test_h(0), view_from_h(0)); + ASSERT_EQ(view_test_h(1), new_value); } else if (name == "two-elements-b") { - EXPECT_EQ(view_from_h(0), ValueType{2}); - EXPECT_EQ(view_from_h(1), ValueType{-1}); + ASSERT_EQ(view_from_h(0), ValueType{2}); + ASSERT_EQ(view_from_h(1), ValueType{-1}); - EXPECT_EQ(view_test_h(0), new_value); - EXPECT_EQ(view_test_h(1), view_from_h(1)); + ASSERT_EQ(view_test_h(0), new_value); + ASSERT_EQ(view_test_h(1), view_from_h(1)); } else if (name == "small-a") { for (std::size_t i = 0; i < view_test_h.extent(0); ++i) { if (i == 0 || i == 3 || i == 5 || i == 6) { - EXPECT_EQ(view_from_h(i), ValueType{2}); - EXPECT_EQ(view_test_h(i), new_value); + ASSERT_EQ(view_from_h(i), ValueType{2}); + ASSERT_EQ(view_test_h(i), new_value); } else { const auto gold = ValueType{-5} + static_cast(i + 1); - EXPECT_EQ(view_from_h(i), gold); - EXPECT_EQ(view_test_h(i), gold); + ASSERT_EQ(view_from_h(i), gold); + ASSERT_EQ(view_test_h(i), gold); } } } @@ -153,11 +153,11 @@ void verify_data(const std::string& name, ViewTypeFrom view_from, else if (name == "small-b") { for (std::size_t i = 0; i < view_test_h.extent(0); ++i) { if (i < 4) { - EXPECT_EQ(view_from_h(i), ValueType{-1}); - EXPECT_EQ(view_test_h(i), view_from_h(i)); + ASSERT_EQ(view_from_h(i), ValueType{-1}); + ASSERT_EQ(view_test_h(i), view_from_h(i)); } else { - EXPECT_EQ(view_from_h(i), ValueType{2}); - EXPECT_EQ(view_test_h(i), new_value); + ASSERT_EQ(view_from_h(i), ValueType{2}); + ASSERT_EQ(view_test_h(i), new_value); } } } @@ -165,11 +165,11 @@ void verify_data(const std::string& name, ViewTypeFrom view_from, else if (name == "medium" || name == "large") { for (std::size_t i = 0; i < view_test_h.extent(0); ++i) { if (i % 2 == 0) { - EXPECT_EQ(view_from_h(i), ValueType{-1}); - EXPECT_EQ(view_test_h(i), view_from_h(i)); + ASSERT_EQ(view_from_h(i), ValueType{-1}); + ASSERT_EQ(view_test_h(i), view_from_h(i)); } else { - EXPECT_EQ(view_from_h(i), ValueType{2}); - EXPECT_EQ(view_test_h(i), new_value); + ASSERT_EQ(view_from_h(i), ValueType{2}); + ASSERT_EQ(view_test_h(i), new_value); } } } @@ -202,7 +202,7 @@ void run_single_scenario(const InfoType& scenario_info) { KE::replace_copy(exespace(), KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), old_value, new_value); verify_data(name, view_from, view_dest, new_value); - EXPECT_EQ(rit, (KE::begin(view_dest) + view_ext)); + ASSERT_EQ(rit, (KE::begin(view_dest) + view_ext)); } { @@ -215,7 +215,7 @@ void run_single_scenario(const InfoType& scenario_info) { KE::cend(view_from), KE::begin(view_dest), old_value, new_value); verify_data(name, view_from, view_dest, new_value); - EXPECT_EQ(rit, (KE::begin(view_dest) + view_ext)); + ASSERT_EQ(rit, (KE::begin(view_dest) + view_ext)); } { @@ -227,7 +227,7 @@ void run_single_scenario(const InfoType& scenario_info) { auto rit = KE::replace_copy(exespace(), view_from, view_dest, old_value, new_value); verify_data(name, view_from, view_dest, new_value); - EXPECT_EQ(rit, (KE::begin(view_dest) + view_ext)); + ASSERT_EQ(rit, (KE::begin(view_dest) + view_ext)); } { @@ -239,7 +239,7 @@ void run_single_scenario(const InfoType& scenario_info) { auto rit = KE::replace_copy("label", exespace(), view_from, view_dest, old_value, new_value); verify_data(name, view_from, view_dest, new_value); - EXPECT_EQ(rit, (KE::begin(view_dest) + view_ext)); + ASSERT_EQ(rit, (KE::begin(view_dest) + view_ext)); } Kokkos::fence(); diff --git a/algorithms/unit_tests/TestStdAlgorithmsReplaceCopyIf.cpp b/algorithms/unit_tests/TestStdAlgorithmsReplaceCopyIf.cpp index a402e30ad9..ceeba88971 100644 --- a/algorithms/unit_tests/TestStdAlgorithmsReplaceCopyIf.cpp +++ b/algorithms/unit_tests/TestStdAlgorithmsReplaceCopyIf.cpp @@ -112,40 +112,40 @@ void verify_data(const std::string& name, ViewTypeFrom view_from, } else if (name == "one-element-a") { - EXPECT_EQ(view_from_h(0), ValueType{1}); - EXPECT_EQ(view_test_h(0), view_from_h(0)); + ASSERT_EQ(view_from_h(0), ValueType{1}); + ASSERT_EQ(view_test_h(0), view_from_h(0)); } else if (name == "one-element-b") { - EXPECT_EQ(view_from_h(0), ValueType{2}); - EXPECT_EQ(view_test_h(0), new_value); + ASSERT_EQ(view_from_h(0), ValueType{2}); + ASSERT_EQ(view_test_h(0), new_value); } else if (name == "two-elements-a") { - EXPECT_EQ(view_from_h(0), ValueType{1}); - EXPECT_EQ(view_from_h(1), ValueType{2}); + ASSERT_EQ(view_from_h(0), ValueType{1}); + ASSERT_EQ(view_from_h(1), ValueType{2}); - EXPECT_EQ(view_test_h(0), view_from_h(0)); - EXPECT_EQ(view_test_h(1), new_value); + ASSERT_EQ(view_test_h(0), view_from_h(0)); + ASSERT_EQ(view_test_h(1), new_value); } else if (name == "two-elements-b") { - EXPECT_EQ(view_from_h(0), ValueType{2}); - EXPECT_EQ(view_from_h(1), ValueType{-1}); + ASSERT_EQ(view_from_h(0), ValueType{2}); + ASSERT_EQ(view_from_h(1), ValueType{-1}); - EXPECT_EQ(view_test_h(0), new_value); - EXPECT_EQ(view_test_h(1), view_from_h(1)); + ASSERT_EQ(view_test_h(0), new_value); + ASSERT_EQ(view_test_h(1), view_from_h(1)); } else if (name == "small-a") { for (std::size_t i = 0; i < view_test_h.extent(0); ++i) { if (i == 0 || i == 3 || i == 5 || i == 6) { - EXPECT_EQ(view_from_h(i), ValueType{2}); - EXPECT_EQ(view_test_h(i), new_value); + ASSERT_EQ(view_from_h(i), ValueType{2}); + ASSERT_EQ(view_test_h(i), new_value); } else { const auto gold = ValueType{-5} + static_cast(i + 1); - EXPECT_EQ(view_from_h(i), gold); - EXPECT_EQ(view_test_h(i), gold); + ASSERT_EQ(view_from_h(i), gold); + ASSERT_EQ(view_test_h(i), gold); } } } @@ -153,11 +153,11 @@ void verify_data(const std::string& name, ViewTypeFrom view_from, else if (name == "small-b") { for (std::size_t i = 0; i < view_test_h.extent(0); ++i) { if (i < 4) { - EXPECT_EQ(view_from_h(i), ValueType{-1}); - EXPECT_EQ(view_test_h(i), view_from_h(i)); + ASSERT_EQ(view_from_h(i), ValueType{-1}); + ASSERT_EQ(view_test_h(i), view_from_h(i)); } else { - EXPECT_EQ(view_from_h(i), ValueType{2}); - EXPECT_EQ(view_test_h(i), new_value); + ASSERT_EQ(view_from_h(i), ValueType{2}); + ASSERT_EQ(view_test_h(i), new_value); } } } @@ -165,11 +165,11 @@ void verify_data(const std::string& name, ViewTypeFrom view_from, else if (name == "medium" || name == "large") { for (std::size_t i = 0; i < view_test_h.extent(0); ++i) { if (i % 2 == 0) { - EXPECT_EQ(view_from_h(i), ValueType{-1}); - EXPECT_EQ(view_test_h(i), view_from_h(i)); + ASSERT_EQ(view_from_h(i), ValueType{-1}); + ASSERT_EQ(view_test_h(i), view_from_h(i)); } else { - EXPECT_EQ(view_from_h(i), ValueType{2}); - EXPECT_EQ(view_test_h(i), new_value); + ASSERT_EQ(view_from_h(i), ValueType{2}); + ASSERT_EQ(view_test_h(i), new_value); } } } @@ -209,7 +209,7 @@ void run_single_scenario(const InfoType& scenario_info) { KE::cend(view_from), KE::begin(view_dest), pred_type(), new_value); verify_data(name, view_from, view_dest, new_value); - EXPECT_EQ(rit, (KE::begin(view_dest) + view_ext)); + ASSERT_EQ(rit, (KE::begin(view_dest) + view_ext)); } { @@ -220,7 +220,7 @@ void run_single_scenario(const InfoType& scenario_info) { KE::cend(view_from), KE::begin(view_dest), pred_type(), new_value); verify_data(name, view_from, view_dest, new_value); - EXPECT_EQ(rit, (KE::begin(view_dest) + view_ext)); + ASSERT_EQ(rit, (KE::begin(view_dest) + view_ext)); } { @@ -230,7 +230,7 @@ void run_single_scenario(const InfoType& scenario_info) { auto rit = KE::replace_copy_if(exespace(), view_from, view_dest, pred_type(), new_value); verify_data(name, view_from, view_dest, new_value); - EXPECT_EQ(rit, (KE::begin(view_dest) + view_ext)); + ASSERT_EQ(rit, (KE::begin(view_dest) + view_ext)); } { @@ -240,7 +240,7 @@ void run_single_scenario(const InfoType& scenario_info) { auto rit = KE::replace_copy_if("label", exespace(), view_from, view_dest, pred_type(), new_value); verify_data(name, view_from, view_dest, new_value); - EXPECT_EQ(rit, (KE::begin(view_dest) + view_ext)); + ASSERT_EQ(rit, (KE::begin(view_dest) + view_ext)); } Kokkos::fence(); diff --git a/algorithms/unit_tests/TestStdAlgorithmsReplaceIf.cpp b/algorithms/unit_tests/TestStdAlgorithmsReplaceIf.cpp index f481144e1c..802c0093c5 100644 --- a/algorithms/unit_tests/TestStdAlgorithmsReplaceIf.cpp +++ b/algorithms/unit_tests/TestStdAlgorithmsReplaceIf.cpp @@ -138,7 +138,7 @@ void verify_data(ViewType1 data_view, // contains data // << data_view_dc(i) << " " // << data_view_h(i) << " " // << test_view_h(i) << std::endl; - EXPECT_EQ(data_view_h(i), test_view_h(i)); + ASSERT_EQ(data_view_h(i), test_view_h(i)); } } } diff --git a/algorithms/unit_tests/TestStdAlgorithmsReverse.cpp b/algorithms/unit_tests/TestStdAlgorithmsReverse.cpp index 7d16e54029..6e6ca72783 100644 --- a/algorithms/unit_tests/TestStdAlgorithmsReverse.cpp +++ b/algorithms/unit_tests/TestStdAlgorithmsReverse.cpp @@ -77,7 +77,7 @@ void verify_data(ViewType1 test_view, ViewType2 orig_view) { const std::size_t ext = test_view.extent(0); for (std::size_t i = 0; i < ext; ++i) { - EXPECT_EQ(tv_h(i), ov_h(ext - i - 1)); + ASSERT_EQ(tv_h(i), ov_h(ext - i - 1)); } } diff --git a/algorithms/unit_tests/TestStdAlgorithmsRotate.cpp b/algorithms/unit_tests/TestStdAlgorithmsRotate.cpp index a5a6f99bac..5638cbee4a 100644 --- a/algorithms/unit_tests/TestStdAlgorithmsRotate.cpp +++ b/algorithms/unit_tests/TestStdAlgorithmsRotate.cpp @@ -136,13 +136,13 @@ void verify_data(ResultIt result_it, ViewType view, ViewHostType data_view_host, // make sure results match const auto my_diff = result_it - KE::begin(view); const auto std_diff = std_rit - KE::begin(data_view_host); - EXPECT_EQ(my_diff, std_diff); + ASSERT_EQ(my_diff, std_diff); // check views match auto view_h = create_host_space_copy(view); const std::size_t ext = view_h.extent(0); for (std::size_t i = 0; i < ext; ++i) { - EXPECT_EQ(view_h(i), data_view_host[i]); + ASSERT_EQ(view_h(i), data_view_host[i]); // std::cout << "i= " << i << " " // << "mine: " << view_h(i) << " " // << "std: " << data_view_host(i) diff --git a/algorithms/unit_tests/TestStdAlgorithmsRotateCopy.cpp b/algorithms/unit_tests/TestStdAlgorithmsRotateCopy.cpp index 27451a1d04..d0caca7cea 100644 --- a/algorithms/unit_tests/TestStdAlgorithmsRotateCopy.cpp +++ b/algorithms/unit_tests/TestStdAlgorithmsRotateCopy.cpp @@ -139,7 +139,7 @@ void verify_data(ViewTypeFrom view_from, ViewTypeTest view_test, std_gold_h.begin()); for (std::size_t i = 0; i < ext; ++i) { - EXPECT_EQ(view_test_h(i), std_gold_h[i]); + ASSERT_EQ(view_test_h(i), std_gold_h[i]); // std::cout << "i= " << i << " " // << "from: " << view_from_h(i) << " " // << "mine: " << view_test_h(i) << " " @@ -177,7 +177,7 @@ void run_single_scenario(const InfoType& scenario_info, auto rit = KE::rotate_copy(exespace(), KE::cbegin(view_from), n_it, KE::cend(view_from), KE::begin(view_dest)); verify_data(view_from, view_dest, rotation_point); - EXPECT_EQ(rit, (KE::begin(view_dest) + view_ext)); + ASSERT_EQ(rit, (KE::begin(view_dest) + view_ext)); } { @@ -187,7 +187,7 @@ void run_single_scenario(const InfoType& scenario_info, auto rit = KE::rotate_copy("label", exespace(), KE::cbegin(view_from), n_it, KE::cend(view_from), KE::begin(view_dest)); verify_data(view_from, view_dest, rotation_point); - EXPECT_EQ(rit, (KE::begin(view_dest) + view_ext)); + ASSERT_EQ(rit, (KE::begin(view_dest) + view_ext)); } { @@ -196,7 +196,7 @@ void run_single_scenario(const InfoType& scenario_info, auto rit = KE::rotate_copy(exespace(), view_from, rotation_point, view_dest); verify_data(view_from, view_dest, rotation_point); - EXPECT_EQ(rit, (KE::begin(view_dest) + view_ext)); + ASSERT_EQ(rit, (KE::begin(view_dest) + view_ext)); } { @@ -205,7 +205,7 @@ void run_single_scenario(const InfoType& scenario_info, auto rit = KE::rotate_copy("label", exespace(), view_from, rotation_point, view_dest); verify_data(view_from, view_dest, rotation_point); - EXPECT_EQ(rit, (KE::begin(view_dest) + view_ext)); + ASSERT_EQ(rit, (KE::begin(view_dest) + view_ext)); } Kokkos::fence(); diff --git a/algorithms/unit_tests/TestStdAlgorithmsSearch.cpp b/algorithms/unit_tests/TestStdAlgorithmsSearch.cpp index ab4bf50713..021609c444 100644 --- a/algorithms/unit_tests/TestStdAlgorithmsSearch.cpp +++ b/algorithms/unit_tests/TestStdAlgorithmsSearch.cpp @@ -259,7 +259,7 @@ void run_single_scenario(const InfoType& scenario_info, std::size_t seq_ext, KE::cbegin(s_view), KE::cend(s_view), args...); const auto mydiff = myrit - KE::cbegin(view); const auto stddiff = stdrit - KE::cbegin(view_h); - EXPECT_EQ(mydiff, stddiff); + ASSERT_EQ(mydiff, stddiff); } { @@ -268,21 +268,21 @@ void run_single_scenario(const InfoType& scenario_info, std::size_t seq_ext, KE::cbegin(s_view), KE::cend(s_view), args...); const auto mydiff = myrit - KE::cbegin(view); const auto stddiff = stdrit - KE::cbegin(view_h); - EXPECT_EQ(mydiff, stddiff); + ASSERT_EQ(mydiff, stddiff); } { auto myrit = KE::search(exespace(), view, s_view, args...); const auto mydiff = myrit - KE::begin(view); const auto stddiff = stdrit - KE::cbegin(view_h); - EXPECT_EQ(mydiff, stddiff); + ASSERT_EQ(mydiff, stddiff); } { auto myrit = KE::search("label", exespace(), view, s_view, args...); const auto mydiff = myrit - KE::begin(view); const auto stddiff = stdrit - KE::cbegin(view_h); - EXPECT_EQ(mydiff, stddiff); + ASSERT_EQ(mydiff, stddiff); } Kokkos::fence(); @@ -325,12 +325,6 @@ void run_all_scenarios() { } TEST(std_algorithms_non_mod_seq_ops, search) { -#if defined(KOKKOS_ENABLE_CUDA) && \ - defined(KOKKOS_COMPILER_NVHPC) // FIXME_NVHPC - if constexpr (std::is_same_v) { - GTEST_SKIP() << "FIXME wrong result"; - } -#endif run_all_scenarios(); run_all_scenarios(); } diff --git a/algorithms/unit_tests/TestStdAlgorithmsSearch_n.cpp b/algorithms/unit_tests/TestStdAlgorithmsSearch_n.cpp index a6fe9c1e89..53ad8daa2e 100644 --- a/algorithms/unit_tests/TestStdAlgorithmsSearch_n.cpp +++ b/algorithms/unit_tests/TestStdAlgorithmsSearch_n.cpp @@ -203,26 +203,26 @@ void run_single_scenario(const InfoType& scenario_info, std::size_t count, auto myrit = KE::search_n(exespace(), KE::cbegin(view), KE::cend(view), count, value, args...); const auto mydiff = myrit - KE::cbegin(view); - EXPECT_EQ(mydiff, stddiff); + ASSERT_EQ(mydiff, stddiff); } { auto myrit = KE::search_n("label", exespace(), KE::cbegin(view), KE::cend(view), count, value, args...); const auto mydiff = myrit - KE::cbegin(view); - EXPECT_EQ(mydiff, stddiff); + ASSERT_EQ(mydiff, stddiff); } { auto myrit = KE::search_n("label", exespace(), view, count, value, args...); const auto mydiff = myrit - KE::begin(view); - EXPECT_EQ(mydiff, stddiff); + ASSERT_EQ(mydiff, stddiff); } { auto myrit = KE::search_n(exespace(), view, count, value, args...); const auto mydiff = myrit - KE::begin(view); - EXPECT_EQ(mydiff, stddiff); + ASSERT_EQ(mydiff, stddiff); } Kokkos::fence(); @@ -297,12 +297,6 @@ void run_all_scenarios() { } TEST(std_algorithms_non_mod_seq_ops, search_n) { -#if defined(KOKKOS_ENABLE_CUDA) && \ - defined(KOKKOS_COMPILER_NVHPC) // FIXME_NVHPC - if constexpr (std::is_same_v) { - GTEST_SKIP() << "FIXME wrong result"; - } -#endif run_all_scenarios(); run_all_scenarios(); } diff --git a/algorithms/unit_tests/TestStdAlgorithmsShiftLeft.cpp b/algorithms/unit_tests/TestStdAlgorithmsShiftLeft.cpp index 8e4ced9635..0b5fe9216e 100644 --- a/algorithms/unit_tests/TestStdAlgorithmsShiftLeft.cpp +++ b/algorithms/unit_tests/TestStdAlgorithmsShiftLeft.cpp @@ -103,12 +103,12 @@ void verify_data(ResultIt result_it, ViewType view, ViewHostType data_view_host, // make sure results match const auto my_diff = result_it - KE::begin(view); const auto std_diff = std_rit - KE::begin(data_view_host); - EXPECT_EQ(my_diff, std_diff); + ASSERT_EQ(my_diff, std_diff); // check views match auto view_h = create_host_space_copy(view); for (std::size_t i = 0; i < (std::size_t)my_diff; ++i) { - EXPECT_EQ(view_h(i), data_view_host[i]); + ASSERT_EQ(view_h(i), data_view_host[i]); // std::cout << "i= " << i << " " // << "mine: " << view_h(i) << " " // << "std: " << data_view_host(i) diff --git a/algorithms/unit_tests/TestStdAlgorithmsShiftRight.cpp b/algorithms/unit_tests/TestStdAlgorithmsShiftRight.cpp index a1614be027..8e4ae94375 100644 --- a/algorithms/unit_tests/TestStdAlgorithmsShiftRight.cpp +++ b/algorithms/unit_tests/TestStdAlgorithmsShiftRight.cpp @@ -101,14 +101,14 @@ void verify_data(ResultIt result_it, ViewType view, ViewHostType data_view_host, // make sure results match const auto my_diff = KE::end(view) - result_it; const auto std_diff = KE::end(data_view_host) - std_rit; - EXPECT_EQ(my_diff, std_diff); + ASSERT_EQ(my_diff, std_diff); // check views match auto view_h = create_host_space_copy(view); auto it1 = KE::cbegin(view_h); auto it2 = KE::cbegin(data_view_host); for (std::size_t i = 0; i < (std::size_t)my_diff; ++i) { - EXPECT_EQ(it1[i], it2[i]); + ASSERT_EQ(it1[i], it2[i]); // std::cout << "i= " << i << " " // << "mine: " << it1[i] << " " // << "std: " << it2[i] diff --git a/algorithms/unit_tests/TestStdAlgorithmsTransformExclusiveScan.cpp b/algorithms/unit_tests/TestStdAlgorithmsTransformExclusiveScan.cpp index 70c04dbafa..75525b3b0f 100644 --- a/algorithms/unit_tests/TestStdAlgorithmsTransformExclusiveScan.cpp +++ b/algorithms/unit_tests/TestStdAlgorithmsTransformExclusiveScan.cpp @@ -165,7 +165,7 @@ void verify_data(ViewType1 data_view, // contains data // << std::abs(gold_h(i) - test_view_h(i)) << std::endl; if (std::is_same::value) { - EXPECT_EQ(gold_h(i), test_view_h(i)); + ASSERT_EQ(gold_h(i), test_view_h(i)); } else { const auto error = std::abs(gold_h(i) - test_view_h(i)); if (error > 1e-10) { @@ -221,7 +221,7 @@ void run_single_scenario(const InfoType& scenario_info, ValueType init_value, auto r = KE::transform_exclusive_scan( exespace(), KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), init_value, bop, uop); - EXPECT_EQ(r, KE::end(view_dest)); + ASSERT_EQ(r, KE::end(view_dest)); verify_data(view_from, view_dest, init_value, bop, uop); } @@ -230,7 +230,7 @@ void run_single_scenario(const InfoType& scenario_info, ValueType init_value, auto r = KE::transform_exclusive_scan( "label", exespace(), KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), init_value, bop, uop); - EXPECT_EQ(r, KE::end(view_dest)); + ASSERT_EQ(r, KE::end(view_dest)); verify_data(view_from, view_dest, init_value, bop, uop); } @@ -238,7 +238,7 @@ void run_single_scenario(const InfoType& scenario_info, ValueType init_value, fill_zero(view_dest); auto r = KE::transform_exclusive_scan(exespace(), view_from, view_dest, init_value, bop, uop); - EXPECT_EQ(r, KE::end(view_dest)); + ASSERT_EQ(r, KE::end(view_dest)); verify_data(view_from, view_dest, init_value, bop, uop); } @@ -246,7 +246,7 @@ void run_single_scenario(const InfoType& scenario_info, ValueType init_value, fill_zero(view_dest); auto r = KE::transform_exclusive_scan("label", exespace(), view_from, view_dest, init_value, bop, uop); - EXPECT_EQ(r, KE::end(view_dest)); + ASSERT_EQ(r, KE::end(view_dest)); verify_data(view_from, view_dest, init_value, bop, uop); } @@ -279,6 +279,59 @@ TEST(std_algorithms_numeric_ops_test, transform_exclusive_scan) { } #endif +template +struct MultiplyFunctor { + KOKKOS_INLINE_FUNCTION + ValueType operator()(const ValueType& a, const ValueType& b) const { + return (a * b); + } +}; + +TEST(std_algorithms_numeric_ops_test, transform_exclusive_scan_functor) { + int dummy = 0; + using view_type = Kokkos::View; + view_type dummy_view("dummy_view", 0); + using unary_op_type = + Kokkos::Experimental::Impl::StdNumericScanIdentityReferenceUnaryFunctor< + int>; + using functor_type = + Kokkos::Experimental::Impl::TransformExclusiveScanFunctor< + exespace, int, int, view_type, view_type, MultiplyFunctor, + unary_op_type>; + functor_type functor(dummy, dummy_view, dummy_view, {}, {}); + using value_type = functor_type::value_type; + + value_type value1; + functor.init(value1); + ASSERT_EQ(value1.val, 0); + ASSERT_EQ(value1.is_initial, true); + + value_type value2; + value2.val = 1; + value2.is_initial = false; + functor.join(value1, value2); + ASSERT_EQ(value1.val, 1); + ASSERT_EQ(value1.is_initial, false); + + functor.init(value1); + functor.join(value2, value1); + ASSERT_EQ(value2.val, 1); + ASSERT_EQ(value2.is_initial, false); + + functor.init(value2); + functor.join(value2, value1); + ASSERT_EQ(value2.val, 0); + ASSERT_EQ(value2.is_initial, true); + + value1.val = 3; + value1.is_initial = false; + value2.val = 2; + value2.is_initial = false; + functor.join(value2, value1); + ASSERT_EQ(value2.val, 6); + ASSERT_EQ(value2.is_initial, false); +} + } // namespace TransformEScan } // namespace stdalgos } // namespace Test diff --git a/algorithms/unit_tests/TestStdAlgorithmsTransformInclusiveScan.cpp b/algorithms/unit_tests/TestStdAlgorithmsTransformInclusiveScan.cpp index 80ff813251..5d122ac5e8 100644 --- a/algorithms/unit_tests/TestStdAlgorithmsTransformInclusiveScan.cpp +++ b/algorithms/unit_tests/TestStdAlgorithmsTransformInclusiveScan.cpp @@ -177,7 +177,7 @@ void verify_data(ViewType1 data_view, // contains data // << std::abs(gold_h(i) - test_view_h(i)) << std::endl; if (std::is_same::value) { - EXPECT_EQ(gold_h(i), test_view_h(i)); + ASSERT_EQ(gold_h(i), test_view_h(i)); } else { const auto error = std::abs(gold_h(i) - test_view_h(i)); if (error > 1e-10) { @@ -246,7 +246,7 @@ void run_single_scenario(const InfoType& scenario_info, auto r = KE::transform_inclusive_scan(exespace(), KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), args...); - EXPECT_EQ(r, KE::end(view_dest)); + ASSERT_EQ(r, KE::end(view_dest)); verify_data(view_from, view_dest, args...); } @@ -255,7 +255,7 @@ void run_single_scenario(const InfoType& scenario_info, auto r = KE::transform_inclusive_scan( "label", exespace(), KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), args...); - EXPECT_EQ(r, KE::end(view_dest)); + ASSERT_EQ(r, KE::end(view_dest)); verify_data(view_from, view_dest, args...); } @@ -263,7 +263,7 @@ void run_single_scenario(const InfoType& scenario_info, fill_zero(view_dest); auto r = KE::transform_inclusive_scan(exespace(), view_from, view_dest, args...); - EXPECT_EQ(r, KE::end(view_dest)); + ASSERT_EQ(r, KE::end(view_dest)); verify_data(view_from, view_dest, args...); } @@ -271,7 +271,7 @@ void run_single_scenario(const InfoType& scenario_info, fill_zero(view_dest); auto r = KE::transform_inclusive_scan("label", exespace(), view_from, view_dest, args...); - EXPECT_EQ(r, KE::end(view_dest)); + ASSERT_EQ(r, KE::end(view_dest)); verify_data(view_from, view_dest, args...); } @@ -306,6 +306,73 @@ TEST(std_algorithms_numeric_ops_test, transform_inclusive_scan) { } #endif +template +struct MultiplyFunctor { + KOKKOS_INLINE_FUNCTION + ValueType operator()(const ValueType& a, const ValueType& b) const { + return (a * b); + } +}; + +TEST(std_algorithms_numeric_ops_test, transform_inclusive_scan_functor) { + using value_type = KE::Impl::ValueWrapperForNoNeutralElement; + + auto test_lambda = [&](auto& functor) { + value_type value1; + functor.init(value1); + ASSERT_EQ(value1.val, 0); + ASSERT_EQ(value1.is_initial, true); + + value_type value2; + value2.val = 1; + value2.is_initial = false; + functor.join(value1, value2); + ASSERT_EQ(value1.val, 1); + ASSERT_EQ(value1.is_initial, false); + + functor.init(value1); + functor.join(value2, value1); + ASSERT_EQ(value2.val, 1); + ASSERT_EQ(value2.is_initial, false); + + functor.init(value2); + functor.join(value2, value1); + ASSERT_EQ(value2.val, 0); + ASSERT_EQ(value2.is_initial, true); + + value1.val = 3; + value1.is_initial = false; + value2.val = 2; + value2.is_initial = false; + functor.join(value2, value1); + ASSERT_EQ(value2.val, 6); + ASSERT_EQ(value2.is_initial, false); + }; + + int dummy = 0; + using view_type = Kokkos::View; + view_type dummy_view("dummy_view", 0); + using unary_op_type = + KE::Impl::StdNumericScanIdentityReferenceUnaryFunctor; + { + using functor_type = KE::Impl::TransformInclusiveScanNoInitValueFunctor< + exespace, int, int, view_type, view_type, MultiplyFunctor, + unary_op_type>; + functor_type functor(dummy_view, dummy_view, {}, {}); + + test_lambda(functor); + } + + { + using functor_type = KE::Impl::TransformInclusiveScanWithInitValueFunctor< + exespace, int, int, view_type, view_type, MultiplyFunctor, + unary_op_type>; + functor_type functor(dummy_view, dummy_view, {}, {}, dummy); + + test_lambda(functor); + } +} + } // namespace TransformIncScan } // namespace stdalgos } // namespace Test diff --git a/algorithms/unit_tests/TestStdAlgorithmsTransformUnaryOp.cpp b/algorithms/unit_tests/TestStdAlgorithmsTransformUnaryOp.cpp index dab81b8f1e..6070c1a60d 100644 --- a/algorithms/unit_tests/TestStdAlgorithmsTransformUnaryOp.cpp +++ b/algorithms/unit_tests/TestStdAlgorithmsTransformUnaryOp.cpp @@ -58,7 +58,7 @@ void verify_data(ViewTypeFrom view_from, ViewTypeTest view_test) { create_mirror_view_and_copy(Kokkos::HostSpace(), view_from_dc); for (std::size_t i = 0; i < view_test_h.extent(0); ++i) { - EXPECT_EQ(view_test_h(i), view_from_h(i) + value_type(1)); + ASSERT_EQ(view_test_h(i), view_from_h(i) + value_type(1)); } } @@ -89,7 +89,7 @@ void run_single_scenario(const InfoType& scenario_info) { auto r1 = KE::transform(exespace(), KE::begin(view_from), KE::end(view_from), KE::begin(view_dest), unOp); verify_data(view_from, view_dest); - EXPECT_EQ(r1, KE::end(view_dest)); + ASSERT_EQ(r1, KE::end(view_dest)); } { @@ -98,7 +98,7 @@ void run_single_scenario(const InfoType& scenario_info) { auto r1 = KE::transform("label", exespace(), KE::begin(view_from), KE::end(view_from), KE::begin(view_dest), unOp); verify_data(view_from, view_dest); - EXPECT_EQ(r1, KE::end(view_dest)); + ASSERT_EQ(r1, KE::end(view_dest)); } { @@ -106,7 +106,7 @@ void run_single_scenario(const InfoType& scenario_info) { create_view(Tag{}, view_ext, "transform_uop_dest"); auto r1 = KE::transform(exespace(), view_from, view_dest, unOp); verify_data(view_from, view_dest); - EXPECT_EQ(r1, KE::end(view_dest)); + ASSERT_EQ(r1, KE::end(view_dest)); } { @@ -114,7 +114,7 @@ void run_single_scenario(const InfoType& scenario_info) { create_view(Tag{}, view_ext, "transform_uop_dest"); auto r1 = KE::transform("label", exespace(), view_from, view_dest, unOp); verify_data(view_from, view_dest); - EXPECT_EQ(r1, KE::end(view_dest)); + ASSERT_EQ(r1, KE::end(view_dest)); } Kokkos::fence(); diff --git a/algorithms/unit_tests/TestStdAlgorithmsUnique.cpp b/algorithms/unit_tests/TestStdAlgorithmsUnique.cpp index a810d31d82..9c5ae0cf8a 100644 --- a/algorithms/unit_tests/TestStdAlgorithmsUnique.cpp +++ b/algorithms/unit_tests/TestStdAlgorithmsUnique.cpp @@ -157,7 +157,7 @@ void verify_data(const std::string& name, ResultIt my_result_it, // const auto std_diff = (std::size_t)(std_r - KE::begin(data_v_h)); const auto my_diff = (std::size_t)(my_result_it - KE::begin(view_test)); - EXPECT_EQ(my_diff, std_diff); + ASSERT_EQ(my_diff, std_diff); // // check the data in the view @@ -170,14 +170,14 @@ void verify_data(const std::string& name, ResultIt my_result_it, // << " my = " << view_test_h(i) << " " // << " std = " << data_v_h(i) // << '\n'; - EXPECT_EQ(view_test_h(i), data_v_h(i)); + ASSERT_EQ(view_test_h(i), data_v_h(i)); } if (name == "medium-b") { using value_type = typename ViewType1::value_type; - EXPECT_EQ(my_diff, (std::size_t)2); - EXPECT_EQ(view_test_h(0), (value_type)22); - EXPECT_EQ(view_test_h(1), (value_type)44); + ASSERT_EQ(my_diff, (std::size_t)2); + ASSERT_EQ(view_test_h(0), (value_type)22); + ASSERT_EQ(view_test_h(1), (value_type)44); } } diff --git a/algorithms/unit_tests/TestStdAlgorithmsUniqueCopy.cpp b/algorithms/unit_tests/TestStdAlgorithmsUniqueCopy.cpp index f609d8517e..3cf43ad4db 100644 --- a/algorithms/unit_tests/TestStdAlgorithmsUniqueCopy.cpp +++ b/algorithms/unit_tests/TestStdAlgorithmsUniqueCopy.cpp @@ -174,51 +174,51 @@ void verify_data(const std::string& name, ViewTypeFrom view_from, } else if (name == "one-element-a") { - EXPECT_EQ(view_test_h(0), static_cast(1)); + ASSERT_EQ(view_test_h(0), static_cast(1)); } else if (name == "one-element-b") { - EXPECT_EQ(view_test_h(0), static_cast(2)); + ASSERT_EQ(view_test_h(0), static_cast(2)); } else if (name == "two-elements-a") { - EXPECT_EQ(view_test_h(0), static_cast(1)); - EXPECT_EQ(view_test_h(1), static_cast(2)); + ASSERT_EQ(view_test_h(0), static_cast(1)); + ASSERT_EQ(view_test_h(1), static_cast(2)); } else if (name == "two-elements-b") { - EXPECT_EQ(view_test_h(0), static_cast(2)); - EXPECT_EQ(view_test_h(1), static_cast(-1)); + ASSERT_EQ(view_test_h(0), static_cast(2)); + ASSERT_EQ(view_test_h(1), static_cast(-1)); } else if (name == "small-a") { - EXPECT_EQ(view_test_h(0), static_cast(0)); - EXPECT_EQ(view_test_h(1), static_cast(1)); - EXPECT_EQ(view_test_h(2), static_cast(2)); - EXPECT_EQ(view_test_h(3), static_cast(3)); - EXPECT_EQ(view_test_h(4), static_cast(4)); - EXPECT_EQ(view_test_h(5), static_cast(5)); - EXPECT_EQ(view_test_h(6), static_cast(6)); - EXPECT_EQ(view_test_h(7), static_cast(0)); - EXPECT_EQ(view_test_h(8), static_cast(0)); - EXPECT_EQ(view_test_h(9), static_cast(0)); - EXPECT_EQ(view_test_h(10), static_cast(0)); + ASSERT_EQ(view_test_h(0), static_cast(0)); + ASSERT_EQ(view_test_h(1), static_cast(1)); + ASSERT_EQ(view_test_h(2), static_cast(2)); + ASSERT_EQ(view_test_h(3), static_cast(3)); + ASSERT_EQ(view_test_h(4), static_cast(4)); + ASSERT_EQ(view_test_h(5), static_cast(5)); + ASSERT_EQ(view_test_h(6), static_cast(6)); + ASSERT_EQ(view_test_h(7), static_cast(0)); + ASSERT_EQ(view_test_h(8), static_cast(0)); + ASSERT_EQ(view_test_h(9), static_cast(0)); + ASSERT_EQ(view_test_h(10), static_cast(0)); } else if (name == "small-b") { - EXPECT_EQ(view_test_h(0), static_cast(1)); - EXPECT_EQ(view_test_h(1), static_cast(2)); - EXPECT_EQ(view_test_h(2), static_cast(3)); - EXPECT_EQ(view_test_h(3), static_cast(4)); - EXPECT_EQ(view_test_h(4), static_cast(5)); - EXPECT_EQ(view_test_h(5), static_cast(6)); - EXPECT_EQ(view_test_h(6), static_cast(8)); - EXPECT_EQ(view_test_h(7), static_cast(9)); - EXPECT_EQ(view_test_h(8), static_cast(8)); - EXPECT_EQ(view_test_h(9), static_cast(0)); - EXPECT_EQ(view_test_h(10), static_cast(0)); - EXPECT_EQ(view_test_h(11), static_cast(0)); - EXPECT_EQ(view_test_h(12), static_cast(0)); + ASSERT_EQ(view_test_h(0), static_cast(1)); + ASSERT_EQ(view_test_h(1), static_cast(2)); + ASSERT_EQ(view_test_h(2), static_cast(3)); + ASSERT_EQ(view_test_h(3), static_cast(4)); + ASSERT_EQ(view_test_h(4), static_cast(5)); + ASSERT_EQ(view_test_h(5), static_cast(6)); + ASSERT_EQ(view_test_h(6), static_cast(8)); + ASSERT_EQ(view_test_h(7), static_cast(9)); + ASSERT_EQ(view_test_h(8), static_cast(8)); + ASSERT_EQ(view_test_h(9), static_cast(0)); + ASSERT_EQ(view_test_h(10), static_cast(0)); + ASSERT_EQ(view_test_h(11), static_cast(0)); + ASSERT_EQ(view_test_h(12), static_cast(0)); } else if (name == "medium" || name == "large") { @@ -230,7 +230,7 @@ void verify_data(const std::string& name, ViewTypeFrom view_from, (void)std_r; for (std::size_t i = 0; i < view_from_h.extent(0); ++i) { - EXPECT_EQ(view_test_h(i), tmp[i]); + ASSERT_EQ(view_test_h(i), tmp[i]); } } @@ -273,7 +273,7 @@ void run_single_scenario(const InfoType& scenario_info, Args... args) { KE::unique_copy(exespace(), KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), args...); verify_data(name, view_from, view_dest, args...); - EXPECT_EQ(rit, (KE::begin(view_dest) + n)); + ASSERT_EQ(rit, (KE::begin(view_dest) + n)); } { @@ -283,7 +283,7 @@ void run_single_scenario(const InfoType& scenario_info, Args... args) { KE::unique_copy("label", exespace(), KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), args...); verify_data(name, view_from, view_dest, args...); - EXPECT_EQ(rit, (KE::begin(view_dest) + n)); + ASSERT_EQ(rit, (KE::begin(view_dest) + n)); } { @@ -291,7 +291,7 @@ void run_single_scenario(const InfoType& scenario_info, Args... args) { create_view(Tag{}, view_ext, "unique_copy_dest"); auto rit = KE::unique_copy(exespace(), view_from, view_dest, args...); verify_data(name, view_from, view_dest, args...); - EXPECT_EQ(rit, (KE::begin(view_dest) + n)); + ASSERT_EQ(rit, (KE::begin(view_dest) + n)); } { @@ -300,7 +300,7 @@ void run_single_scenario(const InfoType& scenario_info, Args... args) { auto rit = KE::unique_copy("label", exespace(), view_from, view_dest, args...); verify_data(name, view_from, view_dest, args...); - EXPECT_EQ(rit, (KE::begin(view_dest) + n)); + ASSERT_EQ(rit, (KE::begin(view_dest) + n)); } Kokkos::fence(); diff --git a/bin/hpcbind b/bin/hpcbind index cb2af2c4b5..b6db270128 100755 --- a/bin/hpcbind +++ b/bin/hpcbind @@ -36,8 +36,14 @@ fi ################################################################################ declare -i HPCBIND_HAS_NVIDIA=0 type nvidia-smi >/dev/null 2>&1 -HPCBIND_HAS_NVIDIA=$((!$?)) +HPCBIND_HAS_NVIDIA=$((! $?)) +################################################################################ +# Check if rocm-smi exist +################################################################################ +declare -i HPCBIND_HAS_AMD=0 +type rocm-smi >/dev/null 2>&1 +HPCBIND_HAS_AMD=$((! $?)) ################################################################################ # Get visible gpu @@ -45,11 +51,30 @@ HPCBIND_HAS_NVIDIA=$((!$?)) declare -i NUM_GPUS=0 HPCBIND_VISIBLE_GPUS="" if [[ ${HPCBIND_HAS_NVIDIA} -eq 1 ]]; then - NUM_GPUS=$(nvidia-smi -L | wc -l); - HPCBIND_HAS_NVIDIA=$((!$?)) + nvidia-smi >/dev/null 2>&1 + HPCBIND_HAS_NVIDIA=$((! $?)) if [[ ${HPCBIND_HAS_NVIDIA} -eq 1 ]]; then - GPU_LIST="$( seq 0 $((NUM_GPUS-1)) )" - HPCBIND_VISIBLE_GPUS=${CUDA_VISIBLE_DEVICES:-${GPU_LIST}} + NUM_GPUS=$(nvidia-smi -L | wc -l); + HPCBIND_HAS_NVIDIA=$((! $?)) + if [[ ${HPCBIND_HAS_NVIDIA} -eq 1 ]]; then + GPU_LIST="$( seq 0 $((NUM_GPUS-1)) )" + HPCBIND_VISIBLE_GPUS=${CUDA_VISIBLE_DEVICES:-${GPU_LIST}} + fi + fi +fi + +if [[ ${HPCBIND_HAS_AMD} -eq 1 ]]; then + # rocm-smi doesn't have an error code if there is no hardware + # check for /sys/module/amdgpu/initstate instead + stat /sys/module/amdgpu/initstate >/dev/null 2>&1 + HPCBIND_HAS_AMD=$((! $?)) + if [[ ${HPCBIND_HAS_AMD} -eq 1 ]]; then + NUM_GPUS=$(rocm-smi -i --csv | sed '/^$/d' | tail -n +2 | wc -l); + HPCBIND_HAS_AMD=$((! $?)) + if [[ ${HPCBIND_HAS_AMD} -eq 1 ]]; then + GPU_LIST="$( seq 0 $((NUM_GPUS-1)) )" + HPCBIND_VISIBLE_GPUS=${ROCR_VISIBLE_DEVICES:-${GPU_LIST}} + fi fi fi @@ -80,7 +105,7 @@ elif [[ ! -z "${MV2_COMM_WORLD_RANK}" ]]; then HPCBIND_QUEUE_NAME="mvapich2" HPCBIND_QUEUE_RANK=${MV2_COMM_WORLD_RANK} HPCBIND_QUEUE_SIZE=${MV2_COMM_WORLD_SIZE} -elif [[ ! -z "${SLURM_LOCAL_ID}" ]]; then +elif [[ ! -z "${SLURM_LOCALID}" ]]; then HPCBIND_QUEUE_MAPPING=1 HPCBIND_QUEUE_NAME="slurm" HPCBIND_QUEUE_RANK=${SLURM_PROCID} @@ -101,8 +126,8 @@ fi function show_help { local cmd=$(basename "$0") echo "Usage: ${cmd} -- command ..." - echo " Set the process mask, OMP environment variables and CUDA environment" - echo " variables to sane values if possible. Uses hwloc and nvidia-smi if" + echo " Set the process mask, OMP environment variables and CUDA/ROCm environment" + echo " variables to sane values if possible. Uses hwloc and nvidia-smi/rocm-smi if" echo " available. Will preserve the current process binding, so it is safe" echo " to use with a queuing system or mpiexec." echo "" @@ -116,10 +141,10 @@ function show_help { echo " --distribute-partition=I" echo " Use the i'th partition (zero based)" echo " --visible-gpus= Comma separated list of gpu ids" - echo " Default: CUDA_VISIBLE_DEVICES or all gpus in" + echo " Default: CUDA_VISIBLE_DEVICES/ROCR_VISIBLE_DEVICES or all gpus in" echo " sequential order" echo " --ignore-queue Ignore queue job id when choosing visible GPU and partition" - echo " --no-gpu-mapping Do not set CUDA_VISIBLE_DEVICES" + echo " --no-gpu-mapping Do not set CUDA_VISIBLE_DEVICES/ROCR_VISIBLE_DEVICES" echo " --openmp=M.m Set env variables for the given OpenMP version" echo " Default: 4.0" echo " --openmp-ratio=N/D Ratio of the cpuset to use for OpenMP" @@ -525,13 +550,24 @@ fi ################################################################################ if [[ ${HPCBIND_ENABLE_GPU_MAPPING} -eq 1 ]]; then - if [[ ${HPCBIND_QUEUE_MAPPING} -eq 0 ]]; then - declare -i GPU_ID=$((HPCBIND_PARTITION % NUM_GPUS)) - export CUDA_VISIBLE_DEVICES="${HPCBIND_VISIBLE_GPUS[${GPU_ID}]}" - else - declare -i MY_TASK_ID=$((HPCBIND_QUEUE_RANK * HPCBIND_DISTRIBUTE + HPCBIND_PARTITION)) - declare -i GPU_ID=$((MY_TASK_ID % NUM_GPUS)) - export CUDA_VISIBLE_DEVICES="${HPCBIND_VISIBLE_GPUS[${GPU_ID}]}" + if [[ ${HPCBIND_HAS_NVIDIA} -eq 1 ]]; then + if [[ ${HPCBIND_QUEUE_MAPPING} -eq 0 ]]; then + declare -i GPU_ID=$((HPCBIND_PARTITION % NUM_GPUS)) + export CUDA_VISIBLE_DEVICES="${HPCBIND_VISIBLE_GPUS[${GPU_ID}]}" + else + declare -i MY_TASK_ID=$((HPCBIND_QUEUE_RANK * HPCBIND_DISTRIBUTE + HPCBIND_PARTITION)) + declare -i GPU_ID=$((MY_TASK_ID % NUM_GPUS)) + export CUDA_VISIBLE_DEVICES="${HPCBIND_VISIBLE_GPUS[${GPU_ID}]}" + fi + elif [[ ${HPCBIND_HAS_AMD} -eq 1 ]]; then + if [[ ${HPCBIND_QUEUE_MAPPING} -eq 0 ]]; then + declare -i GPU_ID=$((HPCBIND_PARTITION % NUM_GPUS)) + export ROCR_VISIBLE_DEVICES="${HPCBIND_VISIBLE_GPUS[${GPU_ID}]}" + else + declare -i MY_TASK_ID=$((HPCBIND_QUEUE_RANK * HPCBIND_DISTRIBUTE + HPCBIND_PARTITION)) + declare -i GPU_ID=$((MY_TASK_ID % NUM_GPUS)) + export ROCR_VISIBLE_DEVICES="${HPCBIND_VISIBLE_GPUS[${GPU_ID}]}" + fi fi fi @@ -541,6 +577,7 @@ fi export HPCBIND_HWLOC_VERSION=${HPCBIND_HWLOC_VERSION} export HPCBIND_HAS_HWLOC=${HPCBIND_HAS_HWLOC} export HPCBIND_HAS_NVIDIA=${HPCBIND_HAS_NVIDIA} +export HPCBIND_HAS_AMD=${HPCBIND_HAS_AMD} export HPCBIND_NUM_PUS=${HPCBIND_NUM_PUS} export HPCBIND_NUM_CORES=${HPCBIND_NUM_CORES} export HPCBIND_NUM_NUMAS=${HPCBIND_NUM_NUMAS} @@ -555,8 +592,14 @@ else export HPCBIND_HWLOC_PARENT_CPUSET="${HPCBIND_HWLOC_PARENT_CPUSET}" fi export HPCBIND_HWLOC_PROC_BIND="${HPCBIND_PROC_BIND}" -export HPCBIND_NVIDIA_ENABLE_GPU_MAPPING=${HPCBIND_ENABLE_GPU_MAPPING} -export HPCBIND_NVIDIA_VISIBLE_GPUS=$(echo "${HPCBIND_VISIBLE_GPUS[*]}" | tr ' ' ',') +if [[ ${HPCBIND_HAS_NVIDIA} -eq 1 ]]; then + export HPCBIND_NVIDIA_ENABLE_GPU_MAPPING=${HPCBIND_ENABLE_GPU_MAPPING} + export HPCBIND_NVIDIA_VISIBLE_GPUS=$(echo "${HPCBIND_VISIBLE_GPUS[*]}" | tr ' ' ',') +fi +if [[ ${HPCBIND_HAS_AMD} -eq 1 ]]; then + export HPCBIND_AMD_ENABLE_GPU_MAPPING=${HPCBIND_ENABLE_GPU_MAPPING} + export HPCBIND_AMD_VISIBLE_GPUS=$(echo "${HPCBIND_VISIBLE_GPUS[*]}" | tr ' ' ',') +fi export HPCBIND_OPENMP_VERSION="${HPCBIND_OPENMP_VERSION}" if [[ "${HPCBIND_QUEUE_NAME}" != "" ]]; then export HPCBIND_QUEUE_RANK=${HPCBIND_QUEUE_RANK} @@ -580,6 +623,9 @@ if [[ ${HPCBIND_TEE} -eq 0 || ${HPCBIND_VERBOSE} -eq 0 ]]; then echo "${TMP_ENV}" | grep -E "^HWLOC_" >> ${HPCBIND_LOG} echo "[CUDA]" >> ${HPCBIND_LOG} echo "${TMP_ENV}" | grep -E "^CUDA_" >> ${HPCBIND_LOG} + echo "[ROCM]" >> ${HPCBIND_LOG} + echo "${TMP_ENV}" | grep -E "^ROCM_" >> ${HPCBIND_LOG} + echo "${TMP_ENV}" | grep -E "^ROCR_" >> ${HPCBIND_LOG} echo "[OPENMP]" >> ${HPCBIND_LOG} echo "${TMP_ENV}" | grep -E "^OMP_" >> ${HPCBIND_LOG} echo "[GOMP] (gcc, g++, and gfortran)" >> ${HPCBIND_LOG} @@ -602,6 +648,9 @@ else echo "${TMP_ENV}" | grep -E "^HWLOC_" > >(tee -a ${HPCBIND_LOG}) echo "[CUDA]" > >(tee -a ${HPCBIND_LOG}) echo "${TMP_ENV}" | grep -E "^CUDA_" > >(tee -a ${HPCBIND_LOG}) + echo "[ROCM]" > >(tee -a ${HPCBIND_LOG}) + echo "${TMP_ENV}" | grep -E "^ROCM_" > >(tee -a ${HPCBIND_LOG}) + echo "${TMP_ENV}" | grep -E "^ROCR_" > >(tee -a ${HPCBIND_LOG}) echo "[OPENMP]" > >(tee -a ${HPCBIND_LOG}) echo "${TMP_ENV}" | grep -E "^OMP_" > >(tee -a ${HPCBIND_LOG}) echo "[GOMP] (gcc, g++, and gfortran)" > >(tee -a ${HPCBIND_LOG}) diff --git a/bin/nvcc_wrapper b/bin/nvcc_wrapper index 2204514d1b..1397148141 100755 --- a/bin/nvcc_wrapper +++ b/bin/nvcc_wrapper @@ -338,6 +338,24 @@ do std_flag=$corrected_std_flag shared_args="$shared_args $std_flag" ;; + --std=c++20|-std=c++20) + if [ -n "$std_flag" ]; then + warn_std_flag + shared_args=${shared_args/ $std_flag/} + fi + # NVCC only has C++20 from version 12 on + cuda_main_version=$([[ $(${nvcc_compiler} --version) =~ V([0-9]+) ]] && echo ${BASH_REMATCH[1]}) + if [ ${cuda_main_version} -lt 12 ]; then + fallback_std_flag="-std=c++14" + # this is hopefully just occurring in a downstream project during CMake feature tests + # we really have no choice here but to accept the flag and change to an accepted C++ standard + echo "nvcc_wrapper does not accept standard flags $1 since partial standard flags and standards after C++14 are not supported. nvcc_wrapper will use $fallback_std_flag instead. It is undefined behavior to use this flag. This should only be occurring during CMake configuration." + std_flag=$fallback_std_flag + else + std_flag=$1 + fi + shared_args="$shared_args $std_flag" + ;; --std=c++17|-std=c++17) if [ -n "$std_flag" ]; then warn_std_flag @@ -389,7 +407,7 @@ do -Woverloaded-virtual) ;; #strip -Xcompiler because we add it - -Xcompiler) + -Xcompiler|--compiler-options) if [[ $2 != "-o" ]]; then if [ $first_xcompiler_arg -eq 1 ]; then xcompiler_args="$2" diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake index 23b473ce24..611c089b2e 100644 --- a/cmake/Dependencies.cmake +++ b/cmake/Dependencies.cmake @@ -1,10 +1,6 @@ TRIBITS_PACKAGE_DEFINE_DEPENDENCIES( - SUBPACKAGES_DIRS_CLASSIFICATIONS_OPTREQS - #SubPackageName Directory Class Req/Opt - # - # New Kokkos subpackages: - Core core PS REQUIRED - Containers containers PS OPTIONAL - Algorithms algorithms PS OPTIONAL - Simd simd PT OPTIONAL + LIB_OPTIONAL_TPLS Pthread CUDA HWLOC DLlib + TEST_OPTIONAL_TPLS CUSPARSE ) + +TRIBITS_TPL_TENTATIVELY_ENABLE(DLlib) diff --git a/cmake/KokkosConfigCommon.cmake.in b/cmake/KokkosConfigCommon.cmake.in index bb5ce5ff81..446d12fa5f 100644 --- a/cmake/KokkosConfigCommon.cmake.in +++ b/cmake/KokkosConfigCommon.cmake.in @@ -6,10 +6,37 @@ SET(Kokkos_CXX_COMPILER "@CMAKE_CXX_COMPILER@") SET(Kokkos_CXX_COMPILER_ID "@KOKKOS_CXX_COMPILER_ID@") SET(Kokkos_CXX_STANDARD @KOKKOS_CXX_STANDARD@) -# These are needed by KokkosKernels +# Required to be a TriBITS-compliant external package +IF(NOT TARGET Kokkos::all_libs) + # CMake Error at /lib/cmake/Kokkos/KokkosConfigCommon.cmake:10 (ADD_LIBRARY): + # ADD_LIBRARY cannot create ALIAS target "Kokkos::all_libs" because target + # "Kokkos::kokkos" is imported but not globally visible. + IF(CMAKE_VERSION VERSION_LESS "3.18") + SET_TARGET_PROPERTIES(Kokkos::kokkos PROPERTIES IMPORTED_GLOBAL ON) + ENDIF() + ADD_LIBRARY(Kokkos::all_libs ALIAS Kokkos::kokkos) +ENDIF() + +# Export Kokkos_ENABLE_ for each backend that was enabled. +# NOTE: "Devices" is a little bit of a misnomer here. These are really +# backends, e.g. Kokkos_ENABLE_OPENMP, Kokkos_ENABLE_CUDA, Kokkos_ENABLE_HIP, +# or Kokkos_ENABLE_SYCL. FOREACH(DEV ${Kokkos_DEVICES}) SET(Kokkos_ENABLE_${DEV} ON) ENDFOREACH() +# Export relevant Kokkos_ENABLE