diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index 5a259e3a58..f7a0a7185a 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -1,21 +1,22 @@ --- name: Bug report -about: Create a report to correct failures and improve our code +about: Create a report (for github issue tracker) to correct failures title: '' labels: '' assignees: '' --- **Describe the bug** -Please provide a concise, clear description of the bug, as well as any available error logs. -**Please also include the following items to support reproducing the bug** -1. compilers (with versions) +Please provide a concise, clear description of the bug, as well as any available error logs. Feel free to contact the Kokkos Slack `# build` channel for further discussion of your issue. + +**Please include the following for a minimal reproducer** + +1. Compilers (with versions) 2. Kokkos release or commit used (i.e., the sha1 number) -3. platform and backend -4. cmake configure command -5. output from cmake command -6. code needed to reproduce the bug -7. command line needed to reproduce the bug -7. please also attach the `KokkosCore_config.h` header file (generated during the build); -**Any additional info** -Please provide any additional context about the issue here. +3. Platform, architecture and backend +4. CMake configure command +5. Output from CMake configure command +6. Minimum, complete code needed to reproduce the bug +7. Command line needed to reproduce the bug +8. `KokkosCore_config.h` header file (generated during the build) +9. Please provide any additional relevant error logs diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000000..7242bd2851 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,6 @@ +version: 2 +updates: + - package-ecosystem: "github-actions" + directory: / + schedule: + interval: "weekly" diff --git a/.github/workflows/clang-format-check.yml b/.github/workflows/clang-format-check.yml new file mode 100644 index 0000000000..f4dbe56011 --- /dev/null +++ b/.github/workflows/clang-format-check.yml @@ -0,0 +1,15 @@ +name: clang-format check + +on: [push, pull_request] + +permissions: read-all + +jobs: + formatting-check: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Run clang-format style check. + uses: DoozyX/clang-format-lint-action@v0.17 + with: + clangFormatVersion: 8 diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml new file mode 100644 index 0000000000..2ed86a1475 --- /dev/null +++ b/.github/workflows/codeql.yml @@ -0,0 +1,51 @@ +name: "CodeQL" + +on: + push: + branches: [ "master", "develop", "release-*" ] + pull_request: + branches: [ "develop" ] + +permissions: read-all + +jobs: + analyze: + name: Analyze + runs-on: ubuntu-latest + timeout-minutes: 360 + permissions: + # required for all workflows + security-events: write + + # only required for workflows in private repositories + actions: read + contents: read + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + # Initializes the CodeQL tools for scanning. + - name: Initialize CodeQL + uses: github/codeql-action/init@v3 + with: + languages: c-cpp + + - name: configure + run: + cmake -B build . + -DKokkos_ENABLE_OPENMP=ON + -DCMAKE_CXX_STANDARD=17 + -DKokkos_ENABLE_DEPRECATED_CODE_4=OFF + -DKokkos_ENABLE_TESTS=ON + -DKokkos_ENABLE_EXAMPLES=ON + -DKokkos_ENABLE_BENCHMARKS=ON + -DCMAKE_BUILD_TYPE=Debug + - name: build + run: + cmake --build build --parallel 2 + + - name: Perform CodeQL Analysis + uses: github/codeql-action/analyze@v3 + with: + category: "/language:c-cpp" diff --git a/.github/workflows/continuous-integration-workflow-32bit.yml b/.github/workflows/continuous-integration-workflow-32bit.yml index 7fab3b0e62..0260cb5894 100644 --- a/.github/workflows/continuous-integration-workflow-32bit.yml +++ b/.github/workflows/continuous-integration-workflow-32bit.yml @@ -1,5 +1,15 @@ name: github-Linux-32bit -on: [push, pull_request] + +on: + push: + branches: + - develop + pull_request: + paths-ignore: + - '**/*.md' + types: [ opened, reopened, synchronize ] + +permissions: read-all concurrency: group: ${ {github.event_name }}-${{ github.workflow }}-${{ github.ref }} @@ -13,7 +23,7 @@ jobs: image: ghcr.io/kokkos/ci-containers/ubuntu:latest steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: install_multilib run: sudo apt-get update && sudo apt-get install -y gcc-multilib g++-multilib gfortran-multilib - name: Configure Kokkos @@ -26,7 +36,7 @@ jobs: -DKokkos_ENABLE_DEPRECATED_CODE_4=ON \ -DKokkos_ENABLE_DEPRECATION_WARNINGS=OFF \ -DKokkos_ENABLE_COMPILER_WARNINGS=ON \ - -DCMAKE_CXX_FLAGS="-Werror -m32 -DKOKKOS_IMPL_32BIT" \ + -DCMAKE_CXX_FLAGS="-Werror -m32" \ -DCMAKE_CXX_COMPILER=g++ \ -DCMAKE_BUILD_TYPE=RelWithDebInfo - name: Build diff --git a/.github/workflows/continuous-integration-workflow-hpx.yml b/.github/workflows/continuous-integration-workflow-hpx.yml index 0c7abd2fc1..35704a28cf 100644 --- a/.github/workflows/continuous-integration-workflow-hpx.yml +++ b/.github/workflows/continuous-integration-workflow-hpx.yml @@ -1,11 +1,20 @@ name: github-Linux-hpx -on: [push, pull_request] +on: + push: + branches: + - develop + pull_request: + paths-ignore: + - '**/*.md' + types: [ opened, reopened, synchronize ] concurrency: group: ${ {github.event_name }}-${{ github.workflow }}-${{ github.ref }} cancel-in-progress: ${{github.event_name == 'pull_request'}} +permissions: read-all + jobs: hpx: name: hpx @@ -13,7 +22,7 @@ jobs: steps: - name: checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: path: kokkos - name: setup hpx dependencies @@ -26,12 +35,12 @@ jobs: libboost-all-dev \ ninja-build - name: checkout hpx - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: repository: STELLAR-GROUP/hpx - ref: 1.8.0 + ref: v1.9.0 path: hpx - - uses: actions/cache@v3 + - uses: actions/cache@v4 id: cache-hpx with: path: ./hpx/install @@ -73,7 +82,7 @@ jobs: -DKokkos_ENABLE_DEPRECATED_CODE_4=OFF \ -DKokkos_ENABLE_EXAMPLES=ON \ -DKokkos_ENABLE_HPX=ON \ - -DKokkos_ENABLE_SERIAL=OFF \ + -DKokkos_ENABLE_SERIAL=ON \ -DKokkos_ENABLE_TESTS=ON \ .. diff --git a/.github/workflows/continuous-integration-workflow.yml b/.github/workflows/continuous-integration-workflow.yml index 741f6b2746..9c7b7585d0 100644 --- a/.github/workflows/continuous-integration-workflow.yml +++ b/.github/workflows/continuous-integration-workflow.yml @@ -1,60 +1,87 @@ name: github-Linux -on: [push, pull_request] + +on: + push: + branches: + - develop + pull_request: + paths-ignore: + - '**/*.md' + types: [ opened, reopened, synchronize ] concurrency: group: ${ {github.event_name }}-${{ github.workflow }}-${{ github.ref }} cancel-in-progress: ${{github.event_name == 'pull_request'}} +permissions: read-all + jobs: CI: continue-on-error: true strategy: matrix: - distro: ['fedora:latest', 'fedora:rawhide', 'ubuntu:latest'] + distro: ['fedora:latest', 'ubuntu:latest'] cxx: ['g++', 'clang++'] cxx_extra_flags: [''] cmake_build_type: ['Release', 'Debug'] backend: ['OPENMP'] clang-tidy: [''] + stdcxx: [17] include: - - distro: 'fedora:intel' + - distro: 'ubuntu:intel' cxx: 'icpc' cxx_extra_flags: '-diag-disable=177,10441' cmake_build_type: 'Release' backend: 'OPENMP' - - distro: 'fedora:intel' + stdcxx: '17' + - distro: 'ubuntu:intel' cxx: 'icpc' cxx_extra_flags: '-diag-disable=177,10441' cmake_build_type: 'Debug' backend: 'OPENMP' - - distro: 'fedora:intel' + stdcxx: '17' + - distro: 'ubuntu:intel' cxx: 'icpx' - cxx_extra_flags: '-fp-model=precise -Wno-pass-failed' + cxx_extra_flags: '-fp-model=precise -Wno-pass-failed -fsanitize=address,undefined -fno-sanitize=function -fno-sanitize-recover=all' + extra_linker_flags: '-fsanitize=address,undefined -fno-sanitize=function -fno-sanitize-recover=all' cmake_build_type: 'Release' backend: 'OPENMP' - - distro: 'fedora:intel' + stdcxx: '17' + - distro: 'ubuntu:intel' cxx: 'icpx' cxx_extra_flags: '-fp-model=precise -Wno-pass-failed' cmake_build_type: 'Debug' backend: 'OPENMP' + stdcxx: '20' - distro: 'ubuntu:latest' cxx: 'clang++' + cxx_extra_flags: '-fsanitize=address,undefined -fno-sanitize=function -fno-sanitize-recover=all' + extra_linker_flags: '-fsanitize=address,undefined -fno-sanitize=function -fno-sanitize-recover=all' cmake_build_type: 'RelWithDebInfo' backend: 'THREADS' clang-tidy: '-DCMAKE_CXX_CLANG_TIDY="clang-tidy;-warnings-as-errors=*"' + stdcxx: '23' + - distro: 'ubuntu:latest' + cxx: 'clang++' + cxx_extra_flags: '-fsanitize=address,undefined -fno-sanitize=function -fno-sanitize-recover=all' + extra_linker_flags: '-fsanitize=address,undefined -fno-sanitize=function -fno-sanitize-recover=all' + cmake_build_type: 'RelWithDebInfo' + backend: 'SERIAL' + stdcxx: '20' - distro: 'ubuntu:latest' cxx: 'g++' cmake_build_type: 'RelWithDebInfo' backend: 'THREADS' + stdcxx: '23' runs-on: ubuntu-latest container: image: ghcr.io/kokkos/ci-containers/${{ matrix.distro }} steps: - name: Checkout desul - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: repository: desul/desul - ref: 477da9c8f40f8db369c28dd3f93a67e376d8511b + ref: 779d0441a778c7088a36d38c4cbf8df3cfa182cc path: desul - name: Install desul working-directory: desul @@ -66,15 +93,12 @@ jobs: cmake -DDESUL_ENABLE_TESTS=OFF -DCMAKE_INSTALL_PREFIX=/usr/desul-install .. sudo cmake --build . --target install --parallel 2 - name: Checkout code - uses: actions/checkout@v3 - - uses: actions/cache@v3 + uses: actions/checkout@v4 + - uses: actions/cache@v4 with: path: ~/.cache/ccache key: kokkos-${{ matrix.distro }}-${{ matrix.cxx }}-${{ matrix.cmake_build_type }}-${{ matrix.openmp }}-${{ github.ref }}-${{ github.sha }} restore-keys: kokkos-${{ matrix.distro }}-${{ matrix.cxx }}-${{ matrix.cmake_build_type }}-${{ matrix.openmp }}-${{ github.ref }} - - name: maybe_disable_death_tests - if: ${{ matrix.distro == 'fedora:rawhide' }} - run: echo "GTEST_FILTER=-*DeathTest*" >> $GITHUB_ENV - name: maybe_use_flang_new if: ${{ matrix.cxx == 'clang++' && startsWith(matrix.distro,'fedora:') }} run: echo "FC=flang-new" >> $GITHUB_ENV @@ -100,6 +124,8 @@ jobs: -DKokkos_ENABLE_DEPRECATION_WARNINGS=OFF \ -DKokkos_ENABLE_COMPILER_WARNINGS=ON \ -DCMAKE_CXX_FLAGS="-Werror ${{ matrix.cxx_extra_flags }}" \ + -DCMAKE_CXX_STANDARD="${{ matrix.stdcxx }}" \ + -DCMAKE_EXE_LINKER_FLAGS="${{ matrix.extra_linker_flags }}" \ -DCMAKE_CXX_COMPILER=${{ matrix.cxx }} \ -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ -DCMAKE_BUILD_TYPE=${{ matrix.cmake_build_type }} @@ -112,6 +138,7 @@ jobs: working-directory: builddir run: ctest --output-on-failure - name: Test linking against build dir + if: ${{ !contains(matrix.cxx_extra_flags, '-fsanitize=address') }} working-directory: example/build_cmake_installed run: | cmake -B builddir_buildtree -DCMAKE_CXX_COMPILER=${{ matrix.cxx }} -DKokkos_ROOT=../../builddir @@ -122,6 +149,7 @@ jobs: - name: Install run: sudo cmake --build builddir --target install - name: Test install + if: ${{ !contains(matrix.cxx_extra_flags, '-fsanitize=address') }} working-directory: example/build_cmake_installed run: | cmake -B builddir -DCMAKE_CXX_COMPILER=${{ matrix.cxx }} diff --git a/.github/workflows/osx.yml b/.github/workflows/osx.yml index 0ff3266848..2bcf41a0d3 100644 --- a/.github/workflows/osx.yml +++ b/.github/workflows/osx.yml @@ -1,11 +1,20 @@ name: github-OSX -on: [push, pull_request] +on: + push: + branches: + - develop + pull_request: + paths-ignore: + - '**/*.md' + types: [ opened, reopened, synchronize ] concurrency: group: ${ {github.event_name }}-${{ github.workflow }}-${{ github.ref }} cancel-in-progress: ${{github.event_name == 'pull_request'}} +permissions: read-all + jobs: osxci: name: osx-ci @@ -24,7 +33,7 @@ jobs: cmake_build_type: "Release" steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: configure run: cmake -B build . diff --git a/.github/workflows/performance-benchmark.yml b/.github/workflows/performance-benchmark.yml index 205239e043..bfbbeea4a8 100644 --- a/.github/workflows/performance-benchmark.yml +++ b/.github/workflows/performance-benchmark.yml @@ -4,6 +4,11 @@ on: branches: - develop pull_request: + paths-ignore: + - '**/*.md' + types: [ opened, reopened, synchronize ] + +permissions: read-all jobs: CI: @@ -20,8 +25,8 @@ jobs: BUILD_ID: ${{ matrix.distro }}-${{ matrix.cxx }}-${{ matrix.backend }} steps: - name: Checkout code - uses: actions/checkout@v3 - - uses: actions/cache@v3 + uses: actions/checkout@v4 + - uses: actions/cache@v4 with: path: ~/.cache/ccache key: kokkos-${{ matrix.distro }}-${{ matrix.cxx }}-${{ matrix.backend }}-${{ github.ref }}-${{ github.sha }} diff --git a/.github/workflows/releases.yml b/.github/workflows/releases.yml new file mode 100644 index 0000000000..f0bb9b0b19 --- /dev/null +++ b/.github/workflows/releases.yml @@ -0,0 +1,83 @@ +on: + push: + tags: '[0-9]+.[0-9]+.[0-9][0-9]' + + +permissions: read-all + +jobs: + # This step builds our artifacts, uploads them to the workflow run, and + # outputs their digest. + build: + outputs: + hashes: ${{ steps.hash.outputs.hashes }} + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Build artifacts + run: | + git archive -o kokkos-${{ github.ref_name }}.zip HEAD + git archive -o kokkos-${{ github.ref_name }}.tar.gz HEAD + + - name: Generate hashes + shell: bash + id: hash + run: | + # sha256sum generates sha256 hash for all artifacts. + # base64 -w0 encodes to base64 and outputs on a single line. + echo "hashes=$(sha256sum kokkos-${{ github.ref_name }}.zip kokkos-${{ github.ref_name }}.tar.gz | base64 -w0)" >> "$GITHUB_OUTPUT" + + - name: Upload source code (zip) + uses: actions/upload-artifact@89ef406dd8d7e03cfd12d9e0a4a378f454709029 # v4.3.5 + with: + name: kokkos-${{ github.ref_name }}.zip + path: kokkos-${{ github.ref_name }}.zip + if-no-files-found: error + retention-days: 5 + + - name: Upload source code (tar.gz) + uses: actions/upload-artifact@89ef406dd8d7e03cfd12d9e0a4a378f454709029 # v4.3.5 + with: + name: kokkos-${{ github.ref_name }}.tar.gz + path: kokkos-${{ github.ref_name }}.tar.gz + if-no-files-found: error + retention-days: 5 + + # This step calls the generic workflow to generate provenance. + provenance: + needs: [build] + permissions: + actions: read + id-token: write + contents: write + uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.0.0 + with: + base64-subjects: "${{ needs.build.outputs.hashes }}" + # Upload provenance to a new release + upload-assets: true + provenance-name: "kokkos-${{ github.ref_name }}.intoto.jsonl" + + # This step uploads our artifacts to the tagged GitHub release. + release: + needs: [build, provenance] + permissions: + contents: write + runs-on: ubuntu-latest + if: startsWith(github.ref, 'refs/tags/') + steps: + - name: Download kokkos-${{ github.ref_name }}.zip + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 + with: + name: kokkos-${{ github.ref_name }}.zip + + - name: Download kokkos-${{ github.ref_name }}.tar.gz + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 + with: + name: kokkos-${{ github.ref_name }}.tar.gz + + - name: Upload assets + uses: softprops/action-gh-release@c062e08bd532815e2082a85e87e3ef29c3e6d191 # v2.0.8 + with: + files: | + kokkos-${{ github.ref_name }}.zip + kokkos-${{ github.ref_name }}.tar.gz diff --git a/.github/workflows/scorecard.yml b/.github/workflows/scorecard.yml new file mode 100644 index 0000000000..77580978b2 --- /dev/null +++ b/.github/workflows/scorecard.yml @@ -0,0 +1,73 @@ +# This workflow uses actions that are not certified by GitHub. They are provided +# by a third-party and are governed by separate terms of service, privacy +# policy, and support documentation. + +name: Scorecard supply-chain security +on: + # For Branch-Protection check. Only the default branch is supported. See + # https://github.com/ossf/scorecard/blob/main/docs/checks.md#branch-protection + branch_protection_rule: + # To guarantee Maintained check is occasionally updated. See + # https://github.com/ossf/scorecard/blob/main/docs/checks.md#maintained + schedule: + # Weekly on Saturdays. + - cron: '30 1 * * 6' + push: + branches: [ master, develop ] + +# Declare default permissions as read only. +permissions: read-all + +jobs: + analysis: + name: Scorecard analysis + runs-on: ubuntu-latest + permissions: + # Needed to upload the results to code-scanning dashboard. + security-events: write + # Needed to publish results and get a badge (see publish_results below). + id-token: write + # Uncomment the permissions below if installing in a private repository. + # contents: read + # actions: read + + steps: + - name: "Checkout code" + uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 + with: + persist-credentials: false + + - name: "Run analysis" + uses: ossf/scorecard-action@62b2cac7ed8198b15735ed49ab1e5cf35480ba46 # v2.4.0 + with: + results_file: results.sarif + results_format: sarif + # (Optional) "write" PAT token. Uncomment the `repo_token` line below if: + # - you want to enable the Branch-Protection check on a *public* repository, or + # - you are installing Scorecard on a *private* repository + # To create the PAT, follow the steps in https://github.com/ossf/scorecard-action#authentication-with-pat. + # repo_token: ${{ secrets.SCORECARD_TOKEN }} + + # Public repositories: + # - Publish results to OpenSSF REST API for easy access by consumers + # - Allows the repository to include the Scorecard badge. + # - See https://github.com/ossf/scorecard-action#publishing-results. + # For private repositories: + # - `publish_results` will always be set to `false`, regardless + # of the value entered here. + publish_results: true + + # Upload the results as artifacts (optional). Commenting out will disable uploads of run results in SARIF + # format to the repository Actions tab. + - name: "Upload artifact" + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 + with: + name: SARIF file + path: results.sarif + retention-days: 5 + + # Upload the results to GitHub's code scanning dashboard. + - name: "Upload SARIF results to code scanning" + uses: github/codeql-action/upload-sarif@9fdb3e49720b44c48891d036bb502feb25684276 # v3.25.6 + with: + sarif_file: results.sarif diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml new file mode 100644 index 0000000000..d801c740a7 --- /dev/null +++ b/.github/workflows/windows.yml @@ -0,0 +1,35 @@ +name: github-windows + +on: + push: + pull_request: + +concurrency: + group: ${ {github.event_name }}-${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: ${{github.event_name == 'pull_request'}} + +permissions: read-all + +jobs: + windows-cuda: + # Cuda build on Windows + name: Windows Cuda + runs-on: windows-2022 + + steps: + - uses: Jimver/cuda-toolkit@v0.2.16 + id: cuda-toolkit + with: + cuda: '12.4.1' + - uses: actions/checkout@v4 + - name: configure + shell: bash + run: | + mkdir build + mkdir c:/project + cd build + cmake -DKokkos_ENABLE_CUDA=ON -DKokkos_ARCH_VOLTA70=ON -DKokkos_ENABLE_TESTS=ON -DKokkos_ENABLE_COMPILE_AS_CMAKE_LANGUAGE=ON .. + - name: build library + shell: bash + run: | + cmake --build build --parallel 2 --config Release diff --git a/.jenkins b/.jenkins index c7d8ce533d..0393ff06fb 100644 --- a/.jenkins +++ b/.jenkins @@ -3,21 +3,26 @@ pipeline { environment { CCACHE_DIR = '/tmp/ccache' - CCACHE_MAXSIZE = '10G' + CCACHE_MAXSIZE = '5G' CCACHE_CPP2 = 'true' } options { + disableConcurrentBuilds(abortPrevious: true) timeout(time: 6, unit: 'HOURS') } + triggers { + issueCommentTrigger('.*test this please.*') + } + stages { stage('Clang-Format') { agent { dockerfile { filename 'Dockerfile.clang' dir 'scripts/docker' - label 'nvidia-docker || rocm-docker || docker' + label 'nvidia-docker || docker' args '-v /tmp/ccache.kokkos:/tmp/ccache' } } @@ -27,7 +32,7 @@ pipeline { } stage('Build') { parallel { - stage('OPENACC-NVHPC-CUDA-11.6') { + stage('OPENACC-NVHPC-CUDA-12.2') { agent { dockerfile { filename 'Dockerfile.nvhpc' @@ -37,7 +42,7 @@ pipeline { } } environment { - CUDA_HOME = '/opt/nvidia/hpc_sdk/Linux_x86_64/22.3/cuda/11.6' + NVHPC_CUDA_HOME = '/opt/nvidia/hpc_sdk/Linux_x86_64/23.7/cuda/12.2' } steps { sh '''rm -rf build && mkdir -p build && cd build && \ @@ -53,13 +58,12 @@ pipeline { make -j8 && ctest --verbose''' } } - stage('CUDA-11.7-NVHPC') { + stage('CUDA-12.2-NVHPC-AS-HOST-COMPILER') { agent { dockerfile { filename 'Dockerfile.nvhpc' dir 'scripts/docker' - additionalBuildArgs '--build-arg BASE=nvcr.io/nvidia/nvhpc:22.9-devel-cuda11.7-ubuntu20.04' - label 'nvidia-docker && large_images' + label 'nvidia-docker && large_images && volta' args '-v /tmp/ccache.kokkos:/tmp/ccache --env NVIDIA_VISIBLE_DEVICES=$NVIDIA_VISIBLE_DEVICES' } } @@ -70,7 +74,7 @@ pipeline { OMP_MAX_ACTIVE_LEVELS = 1 OMP_PLACES = 'threads' OMP_PROC_BIND = 'spread' - NVHPC_CUDA_HOME = '/opt/nvidia/hpc_sdk/Linux_x86_64/22.9/cuda/11.7' + NVHPC_CUDA_HOME = '/opt/nvidia/hpc_sdk/Linux_x86_64/23.7/cuda/12.2' } steps { sh '''rm -rf build && mkdir -p build && cd build && \ @@ -78,7 +82,7 @@ pipeline { -DCMAKE_BUILD_TYPE=RelWithDebInfo \ -DCMAKE_CXX_COMPILER=nvc++ \ -DCMAKE_CXX_STANDARD=17 \ - -DCMAKE_CXX_FLAGS="--diag_suppress=implicit_return_from_non_void_function,no_device_stack" \ + -DCMAKE_CXX_FLAGS="--diag_suppress=implicit_return_from_non_void_function" \ -DKokkos_ARCH_NATIVE=ON \ -DKokkos_ENABLE_COMPILER_WARNINGS=ON \ -DKokkos_ENABLE_DEPRECATED_CODE_4=OFF \ @@ -95,22 +99,22 @@ pipeline { dockerfile { filename 'Dockerfile.sycl' dir 'scripts/docker' - label 'nvidia-docker && volta' + label 'nvidia-docker && ampere' args '-v /tmp/ccache.kokkos:/tmp/ccache' } } steps { sh 'ccache --zero-stats' - sh '''. /opt/intel/oneapi/setvars.sh --include-intel-llvm && \ - rm -rf build && mkdir -p build && cd build && \ + sh '''rm -rf build && mkdir -p build && cd build && \ cmake \ -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ - -DCMAKE_CXX_COMPILER=/opt/intel/oneapi/compiler/2023.0.0/linux/bin-llvm/clang++ \ + -DCMAKE_CXX_COMPILER=clang++ \ -DCMAKE_CXX_FLAGS="-fsycl-device-code-split=per_kernel -Wno-deprecated-declarations -Werror -Wno-gnu-zero-variadic-macro-arguments -Wno-unknown-cuda-version -Wno-sycl-target" \ + -DCMAKE_PREFIX_PATH="$ONE_DPL_DIR" \ -DKOKKOS_IMPL_SYCL_DEVICE_GLOBAL_SUPPORTED=0 \ -DKokkos_ARCH_NATIVE=ON \ - -DKokkos_ARCH_VOLTA70=ON \ + -DKokkos_ARCH_AMPERE80=ON \ -DKokkos_ENABLE_COMPILER_WARNINGS=ON \ -DKokkos_ENABLE_DEPRECATED_CODE_4=OFF \ -DKokkos_ENABLE_DEPRECATION_WARNINGS=OFF \ @@ -134,8 +138,8 @@ pipeline { dockerfile { filename 'Dockerfile.hipcc' dir 'scripts/docker' - additionalBuildArgs '--build-arg BASE=rocm/dev-ubuntu-20.04:5.2' - label 'rocm-docker && vega' + additionalBuildArgs '--build-arg BASE=rocm/dev-ubuntu-20.04:5.2-complete' + label 'rocm-docker ' args '-v /tmp/ccache.kokkos:/tmp/ccache --device=/dev/kfd --device=/dev/dri --security-opt seccomp=unconfined --group-add video --env HIP_VISIBLE_DEVICES=$HIP_VISIBLE_DEVICES' } } @@ -161,6 +165,7 @@ pipeline { -DKokkos_ENABLE_BENCHMARKS=ON \ -DKokkos_ENABLE_HIP=ON \ -DKokkos_ENABLE_OPENMP=ON \ + -DKokkos_ENABLE_IMPL_MDSPAN=OFF \ -DKokkos_ENABLE_HIP_MULTIPLE_KERNEL_INSTANTIATIONS=ON \ .. && \ make -j8 && ctest --verbose''' @@ -171,13 +176,13 @@ pipeline { } } } - stage('HIP-ROCm-5.2-C++20') { + stage('HIP-ROCm-5.6-C++20') { agent { dockerfile { filename 'Dockerfile.hipcc' dir 'scripts/docker' - additionalBuildArgs '--build-arg BASE=rocm/dev-ubuntu-20.04:5.2' - label 'rocm-docker && vega' + additionalBuildArgs '--build-arg BASE=rocm/dev-ubuntu-22.04:5.6-complete' + label 'rocm-docker' args '-v /tmp/ccache.kokkos:/tmp/ccache --device=/dev/kfd --device=/dev/dri --security-opt seccomp=unconfined --group-add video --env HIP_VISIBLE_DEVICES=$HIP_VISIBLE_DEVICES' } } @@ -185,6 +190,7 @@ pipeline { sh 'ccache --zero-stats' sh '''rm -rf build && mkdir -p build && cd build && \ cmake \ + -DBUILD_SHARED_LIBS=ON \ -DCMAKE_BUILD_TYPE=RelWithDebInfo \ -DCMAKE_CXX_COMPILER=hipcc \ -DCMAKE_CXX_FLAGS="-Werror -Wno-unused-command-line-argument" \ @@ -239,7 +245,7 @@ pipeline { -DKokkos_ENABLE_BENCHMARKS=ON \ -DKokkos_ENABLE_OPENMPTARGET=ON \ -DKokkos_ENABLE_OPENMP=ON \ - -DKokkos_ARCH_VEGA906=ON \ + -DKokkos_ARCH_AMD_GFX906=ON \ && \ cmake --build build --parallel ${BUILD_JOBS} && \ cd build && ctest --output-on-failure @@ -287,7 +293,7 @@ pipeline { } } } - stage('CUDA-10.1-Clang-Tidy') { + stage('CUDA-11.0.3-Clang-Tidy') { agent { dockerfile { filename 'Dockerfile.kokkosllvmproject' @@ -304,7 +310,7 @@ pipeline { -DCMAKE_CXX_CLANG_TIDY="clang-tidy;-warnings-as-errors=*" \ -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ -DCMAKE_CXX_COMPILER=clang++ \ - -DCMAKE_CXX_FLAGS=-Werror \ + -DCMAKE_CXX_FLAGS="-Werror -Wno-unknown-cuda-version" \ -DCMAKE_CXX_STANDARD=17 \ -DKokkos_ARCH_NATIVE=ON \ -DKokkos_ENABLE_COMPILER_WARNINGS=ON \ @@ -330,7 +336,7 @@ pipeline { dockerfile { filename 'Dockerfile.nvcc' dir 'scripts/docker' - additionalBuildArgs '--build-arg BASE=nvidia/cuda:11.7.0-devel-ubuntu20.04' + additionalBuildArgs '--build-arg BASE=nvidia/cuda:11.7.1-devel-ubuntu20.04' label 'nvidia-docker && volta' args '-v /tmp/ccache.kokkos:/tmp/ccache --env NVIDIA_VISIBLE_DEVICES=$NVIDIA_VISIBLE_DEVICES' } @@ -340,7 +346,7 @@ pipeline { sh '''rm -rf build && mkdir -p build && cd build && \ ../gnu_generate_makefile.bash \ --with-options=compiler_warnings \ - --cxxflags="-Werror" \ + --cxxflags="-Werror -Werror all-warnings -Xcudafe --diag_suppress=20208" \ --cxxstandard=c++17 \ --with-cuda \ --with-cuda-options=enable_lambda \ @@ -360,7 +366,7 @@ pipeline { filename 'Dockerfile.nvcc' dir 'scripts/docker' additionalBuildArgs '--build-arg BASE=nvidia/cuda:11.0.3-devel-ubuntu18.04 --build-arg ADDITIONAL_PACKAGES="g++-8 gfortran clang" --build-arg CMAKE_VERSION=3.17.3' - label 'nvidia-docker' + label 'nvidia-docker && (volta || ampere)' args '-v /tmp/ccache.kokkos:/tmp/ccache --env NVIDIA_VISIBLE_DEVICES=$NVIDIA_VISIBLE_DEVICES' } } @@ -389,7 +395,6 @@ pipeline { -DKokkos_ENABLE_CUDA_LAMBDA=OFF \ -DKokkos_ENABLE_CUDA_UVM=ON \ -DKokkos_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE=ON \ - -DKokkos_ENABLE_DEPRECATED_CODE_3=ON \ -DKokkos_ENABLE_DEPRECATED_CODE_4=ON \ -DKokkos_ENABLE_DEPRECATION_WARNINGS=OFF \ -DCMAKE_INSTALL_PREFIX=${PWD}/../install \ @@ -431,8 +436,8 @@ pipeline { dockerfile { filename 'Dockerfile.nvcc' dir 'scripts/docker' - additionalBuildArgs '--build-arg BASE=nvidia/cuda:11.6.0-devel-ubuntu20.04' - label 'nvidia-docker' + additionalBuildArgs '--build-arg BASE=nvidia/cuda:11.6.2-devel-ubuntu20.04' + label 'nvidia-docker && (volta || ampere)' args '-v /tmp/ccache.kokkos:/tmp/ccache --env NVIDIA_VISIBLE_DEVICES=$NVIDIA_VISIBLE_DEVICES' } } @@ -440,10 +445,11 @@ pipeline { sh 'ccache --zero-stats' sh '''rm -rf build && mkdir -p build && cd build && \ cmake \ + -DBUILD_SHARED_LIBS=ON \ -DCMAKE_BUILD_TYPE=Debug \ -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ -DCMAKE_CXX_COMPILER=$WORKSPACE/bin/nvcc_wrapper \ - -DCMAKE_CXX_FLAGS=-Werror \ + -DCMAKE_CXX_FLAGS="-Werror -Werror all-warnings -Xcudafe --diag_suppress=20208" \ -DCMAKE_CXX_STANDARD=17 \ -DKokkos_ARCH_NATIVE=ON \ -DKokkos_ENABLE_COMPILER_WARNINGS=ON \ @@ -455,6 +461,8 @@ pipeline { -DKokkos_ENABLE_CUDA=ON \ -DKokkos_ENABLE_CUDA_LAMBDA=ON \ -DKokkos_ENABLE_LIBDL=OFF \ + -DKokkos_ENABLE_IMPL_MDSPAN=OFF \ + -DKokkos_ENABLE_IMPL_CUDA_MALLOC_ASYNC=OFF \ .. && \ make -j8 && ctest --verbose && \ cd ../example/build_cmake_in_tree && \ @@ -489,9 +497,8 @@ pipeline { -DCMAKE_CXX_FLAGS=-Werror \ -DKokkos_ARCH_NATIVE=ON \ -DKokkos_ENABLE_COMPILER_WARNINGS=ON \ - -DKokkos_ENABLE_DEPRECATED_CODE_3=ON \ -DKokkos_ENABLE_DEPRECATED_CODE_4=ON \ - -DKokkos_ENABLE_DEPRECATION_WARNINGS=OFF \ + -DKokkos_ENABLE_DEPRECATION_WARNINGS=ON \ -DKokkos_ENABLE_TESTS=ON \ -DKokkos_ENABLE_BENCHMARKS=ON \ -DKokkos_ENABLE_OPENMP=ON \ diff --git a/.jenkins_nightly b/.jenkins_nightly index 8bcdb75a2a..8dd02e9f02 100644 --- a/.jenkins_nightly +++ b/.jenkins_nightly @@ -70,6 +70,74 @@ pipeline { ''' } } + stage('GCC-14') { + agent { + docker { + image 'gcc:14.1' + label 'docker' + } + } + steps { + sh ''' + wget https://github.com/Kitware/CMake/releases/download/v3.30.0/cmake-3.30.0-linux-x86_64.sh && \ + chmod +x cmake-3.30.0-linux-x86_64.sh && ./cmake-3.30.0-linux-x86_64.sh --skip-license --prefix=/usr + + mkdir -p build && cd build && \ + cmake \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_CXX_STANDARD=26 \ + -DCMAKE_CXX_FLAGS=-Werror \ + -DKokkos_ARCH_NATIVE=ON \ + -DKokkos_ENABLE_COMPILER_WARNINGS=ON \ + -DKokkos_ENABLE_BENCHMARKS=ON \ + -DKokkos_ENABLE_EXAMPLES=ON \ + -DKokkos_ENABLE_TESTS=ON \ + -DKokkos_ENABLE_DEPRECATED_CODE_4=ON \ + -DKokkos_ENABLE_DEPRECATION_WARNINGS=ON \ + -DKokkos_ENABLE_SERIAL=ON \ + .. && \ + make -j8 && ctest --verbose + ''' + } + } + stage('HIP-ROCM-6.1') { + agent { + dockerfile { + filename 'Dockerfile.hipcc' + dir 'scripts/docker' + additionalBuildArgs '--build-arg BASE=rocm/dev-ubuntu-22.04:6.1.2-complete' + label 'rocm-docker && AMD_Radeon_Instinct_MI210' + args '-v /tmp/ccache.kokkos:/tmp/ccache --device=/dev/kfd --device=/dev/dri --security-opt seccomp=unconfined --group-add video --env HIP_VISIBLE_DEVICES=$HIP_VISIBLE_DEVICES' + } + } + environment { + // FIXME Test returns a wrong value + GTEST_FILTER = '-hip_hostpinned.view_allocation_large_rank' + } + steps { + sh 'ccache --zero-stats' + sh '''rm -rf build && mkdir -p build && cd build && \ + cmake \ + -DCMAKE_BUILD_TYPE=RelWithDebInfo \ + -DCMAKE_CXX_COMPILER=hipcc \ + -DCMAKE_CXX_FLAGS="-Werror -Wno-unused-command-line-argument" \ + -DCMAKE_CXX_STANDARD=20 \ + -DKokkos_ARCH_NATIVE=ON \ + -DKokkos_ENABLE_COMPILER_WARNINGS=ON \ + -DKokkos_ENABLE_DEPRECATED_CODE_4=ON \ + -DKokkos_ENABLE_DEPRECATION_WARNINGS=ON \ + -DKokkos_ENABLE_TESTS=ON \ + -DKokkos_ENABLE_BENCHMARKS=ON \ + -DKokkos_ENABLE_HIP=ON \ + .. && \ + make -j8 && ctest --verbose''' + } + post { + always { + sh 'ccache --show-stats' + } + } + } } } } diff --git a/.olcf-gitlab-ci.yml b/.olcf-gitlab-ci.yml new file mode 100644 index 0000000000..4e737cc536 --- /dev/null +++ b/.olcf-gitlab-ci.yml @@ -0,0 +1,12 @@ +test: + stage: test + tags: [frontier, shell] + id_tokens: + OLCF_ID_TOKEN: + aud: https://code.olcf.ornl.gov + script: + - module load rocm/6.0 + - cmake -B build -DCMAKE_CXX_COMPILER=hipcc -DKokkos_ENABLE_HIP=ON -DKokkos_ENABLE_TESTS=ON + - cmake --build build -j48 + - cd build + - ctest -E Kokkos_CoreUnitTest_DeviceAndThreads -V diff --git a/CHANGELOG.md b/CHANGELOG.md index 4c145c44b3..78225f9e6c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,307 @@ # CHANGELOG -## [4.1.00](https://github.com/kokkos/kokkos/tree/4.0.01) (2023-06-16) +## [4.4.00](https://github.com/kokkos/kokkos/tree/4.4.00) +[Full Changelog](https://github.com/kokkos/kokkos/compare/4.3.01...4.4.00) + +### Features: +* Add `Kokkos::View` conversions from and to [`std::mdspan`](https://en.cppreference.com/w/cpp/container/mdspan) [\#6830](https://github.com/kokkos/kokkos/pull/6830) [\#7069](https://github.com/kokkos/kokkos/pull/7069) + +### Backend and Architecture Enhancements: + +#### CUDA: +* `nvcc_wrapper`: Adding ability to process `--disable-warnings` flag [\#6936](https://github.com/kokkos/kokkos/issues/6936) +* Use recommended/max team size functions in Cuda ParallelFor and Reduce constructors [\#6891](https://github.com/kokkos/kokkos/issues/6891) +* Improve compile-times when building with `Kokkos_ENABLE_DEBUG_BOUNDS_CHECK` in Cuda [\#7013](https://github.com/kokkos/kokkos/pull/7013) + +#### HIP: +* Use HIP builtin atomics [\#6882](https://github.com/kokkos/kokkos/pull/6882) [\#7000](https://github.com/kokkos/kokkos/pull/7000) +* Enable user-specified compiler and linker flags for AMD GPUs [\#7127](https://github.com/kokkos/kokkos/pull/7127) + +#### SYCL: +* Add support for Graphs [\#6912](https://github.com/kokkos/kokkos/pull/6912) +* Fix multi-GPU support [\#6887](https://github.com/kokkos/kokkos/pull/6887) +* Improve performance of reduction and scan operations [\#6562](https://github.com/kokkos/kokkos/pull/6562), [\#6750](https://github.com/kokkos/kokkos/pull/6750) +* Fix lock for guarding scratch space in `TeamPolicy` `parallel_reduce` [\#6988](https://github.com/kokkos/kokkos/pull/6988) +* Include submission command queue property information into `SYCL::print_configuration()` [\#7004](https://github.com/kokkos/kokkos/pull/7004) + +#### OpenACC: +* Make `TeamPolicy` `parallel_for` execute on the correct async queue [\#7012](https://github.com/kokkos/kokkos/pull/7012) + +#### OpenMPTarget: +* Honor user requested loop ordering in `MDRange` policy [\#6925](https://github.com/kokkos/kokkos/pull/6925) +* Prevent data races by guarding the scratch space used in `parallel_scan` [\#6998](https://github.com/kokkos/kokkos/pull/6998) + +#### HPX: +* Workaround issue with template argument deduction to support compilation with NVCC [\#7015](https://github.com/kokkos/kokkos/pull/7015) + +### General Enhancements +* Improve performance of view copies in host parallel regions [\#6730](https://github.com/kokkos/kokkos/pull/6730) +* Harmonize convertibility rules of `Kokkos::RandomAccessIterator` with `View`s [\#6929](https://github.com/kokkos/kokkos/pull/6929) +* Add a check precondition non-overlapping ranges for the `adjacent_difference` algorithm in debug mode [\#6922](https://github.com/kokkos/kokkos/pull/6922) +* Add deduction guides for `TeamPolicy` [\#7030](https://github.com/kokkos/kokkos/pull/7030) +* SIMD: Allow flexible vector width for 32 bit types [\#6802](https://github.com/kokkos/kokkos/pull/6802) +* Updates for `Kokkos::Array`: add `kokkos_swap(Array)` specialization [\#6943](https://github.com/kokkos/kokkos/pull/6943), add `Kokkos::to_array` [\#6375](https://github.com/kokkos/kokkos/pull/6375), make `Kokkos::Array` equality-comparable [\#7148](https://github.com/kokkos/kokkos/pull/7148) +* Structured binding support for `Kokkos::complex` [\#7040](https://github.com/kokkos/kokkos/pull/7040) + +### Build System Changes +* Do not require OpenMP support for languages other than CXX [\#6965](https://github.com/kokkos/kokkos/pull/6965) +* Update Intel GPU architectures in Makefile [\#6895](https://github.com/kokkos/kokkos/pull/6895) +* Fix use of OpenMP with Cuda or HIP as compile language [\#6972](https://github.com/kokkos/kokkos/pull/6972) +* Define and enforce new minimum compiler versions for C++20 support [\#7128](https://github.com/kokkos/kokkos/pull/7128), [\#7123](https://github.com/kokkos/kokkos/pull/7123) +* Add nvidia Grace CPU architecture: `Kokkos_ARCH_ARMV9_GRACE` [\#7158](https://github.com/kokkos/kokkos/pull/7158) +* Fix Makefile.kokkos for Threads [\#6896](https://github.com/kokkos/kokkos/pull/6896) +* Remove support for NVHPC as CUDA device compiler [\#6987](https://github.com/kokkos/kokkos/pull/6987) +* Fix using CUDAToolkit for CMake 3.28.4 and higher [\#7062](https://github.com/kokkos/kokkos/pull/7062) + +### Incompatibilities (i.e. breaking changes) +* Drop `Kokkos::Array` special treatment in `View`s [\#6906](https://github.com/kokkos/kokkos/pull/6906) +* Drop `Experimental::RawMemoryAllocationFailure` [\#7145](https://github.com/kokkos/kokkos/pull/7145) + +### Deprecations +* Remove `Experimental::LayoutTiled` class template and deprecate `is_layouttiled` trait [\#6907](https://github.com/kokkos/kokkos/pull/6907) +* Deprecate `Kokkos::layout_iterate_type_selector` [\#7076](https://github.com/kokkos/kokkos/pull/7076) +* Deprecate specialization of `Kokkos::pair` for a single element [\#6947](https://github.com/kokkos/kokkos/pull/6947) +* Deprecate `deep_copy` of `UnorderedMap` of different size [\#6812](https://github.com/kokkos/kokkos/pull/6812) +* Deprecate trailing `Proxy` template argument of `Kokkos::Array` [\#6934](https://github.com/kokkos/kokkos/pull/6934) +* Deprecate implicit conversions of integers to `ChunkSize` [\#7151](https://github.com/kokkos/kokkos/pull/7151) +* Deprecate implicit conversions to execution spaces [\#7156](https://github.com/kokkos/kokkos/pull/7156) + +### Bug Fixes +* Do not return a copy of the input functor in `Experimental::for_each` [\#6910](https://github.com/kokkos/kokkos/pull/6910) +* Fix `realloc` on views of non-default constructible element types [\#6993](https://github.com/kokkos/kokkos/pull/6993) +* Fix undefined behavior in `View` initialization or fill with zeros [\#7014](https://github.com/kokkos/kokkos/pull/7014) +* Fix `sort_by_key` on host execution spaces when building with NVCC [\#7059](https://github.com/kokkos/kokkos/pull/7059) +* Fix using shared libraries and -fvisibility=hidden [\#7065](https://github.com/kokkos/kokkos/pull/7065) +* Fix view reference counting when functor copy constructor throws in parallel dispatch [\#6289](https://github.com/kokkos/kokkos/pull/6289) +* Fix `initialize(InitializationSetting)` for handling `print_configuration` setting [\#7098](https://github.com/kokkos/kokkos/pull/7098) +* Thread safety fixes for the Serial and OpenMP backend [\#7080](https://github.com/kokkos/kokkos/pull/7080), [\#6151](https://github.com/kokkos/kokkos/pull/6151) + +## [4.3.01](https://github.com/kokkos/kokkos/tree/4.3.01) +[Full Changelog](https://github.com/kokkos/kokkos/compare/4.3.00...4.3.01) + +### Backend and Architecture Enhancements: + +#### HIP: +* MI300 support unified memory [\#6877](https://github.com/kokkos/kokkos/pull/6877) + +### Bug Fixes +* Serial: Use the provided execution space instance in TeamPolicy [\#6951](https://github.com/kokkos/kokkos/pull/6951) +* `nvcc_wrapper`: bring back support for `--fmad` option [\#6931](https://github.com/kokkos/kokkos/pull/6931) +* Fix CUDA reduction overflow for `RangePolicy` [\#6578](https://github.com/kokkos/kokkos/pull/6578) + +## [4.3.00](https://github.com/kokkos/kokkos/tree/4.3.00) (2024-03-19) +[Full Changelog](https://github.com/kokkos/kokkos/compare/4.2.01...4.3.00) + +### Features: +* Add `Experimental::sort_by_key(exec, keys, values)` algorithm [\#6801](https://github.com/kokkos/kokkos/pull/6801) + +### Backend and Architecture Enhancements: + +#### CUDA: +* Experimental multi-GPU support (from the same process) [\#6782](https://github.com/kokkos/kokkos/pull/6782) +* Link against CUDA libraries even with KOKKOS_ENABLE_COMPILE_AS_CMAKE_LANGUAGE [\#6701](https://github.com/kokkos/kokkos/pull/6701) +* Don't use the compiler launcher script if the CMake compile language is CUDA. [\#6704](https://github.com/kokkos/kokkos/pull/6704) +* nvcc(wrapper): adding "long" and "short" versions for all flags [\#6615](https://github.com/kokkos/kokkos/pull/6615) + +#### HIP: + * Fix compilation when using amdclang (with ROCm >= 5.7) and RDC [\#6857](https://github.com/kokkos/kokkos/pull/6857) + * Use rocthrust for sorting, when available [\#6793](https://github.com/kokkos/kokkos/pull/6793) + +#### SYCL: +* We only support OneAPI SYCL implementation: add check during initialization + * Error out on initialization if the backend is different from `ext_oneapi_*` [\#6784](https://github.com/kokkos/kokkos/pull/6784) + * Filter GPU devices for `ext_onapi_*` GPU devices [\#6758](https://github.com/kokkos/kokkos/pull/6784) +* Performance Improvements + * Avoid unnecessary zero-memset of the scratch flags in SYCL [\#6739](https://github.com/kokkos/kokkos/pull/6739) + * Use host-pinned memory to copy reduction/scan result [\#6500](https://github.com/kokkos/kokkos/pull/6500) +* Address deprecations after oneAPI 2023.2.0 [\#6577](https://github.com/kokkos/kokkos/pull/6739) +* Make sure to call find_dependency for oneDPL if necessary [\#6870](https://github.com/kokkos/kokkos/pull/6870) + +#### OpenMPTarget: +* Use LLVM extensions for dynamic shared memory [\#6380](https://github.com/kokkos/kokkos/pull/6380) +* Guard scratch memory usage in ParallelReduce [\#6585 ](https://github.com/kokkos/kokkos/pull/6585) +* Update linker flags for Intel GPUs update [\#6735](https://github.com/kokkos/kokkos/pull/6735) +* Improve handling of printf on Intel GPUs [\#6652](https://github.com/kokkos/kokkos/pull/6652) + +#### OpenACC: +* Add atomics support [\#6446](https://github.com/kokkos/kokkos/pull/6446) +* Make the OpenACC backend asynchronous [\#6772](https://github.com/kokkos/kokkos/pull/6772) + +#### Threads: +* Add missing broadcast to TeamThreadRange parallel_scan [\#6601](https://github.com/kokkos/kokkos/pull/6601) + +#### OpenMP: +* Improve performance of view initializations and filling with zeros [\#6573](https://github.com/kokkos/kokkos/pull/6573) + +### General Enhancements + +* Improve performance of random number generation when using a normal distribution on GPUs [\#6556](https://github.com/kokkos/kokkos/pull/6556) +* Allocate temporary view with the user-provided execution space instance and do not initialize in `unique` algorithm [\#6598](https://github.com/kokkos/kokkos/pull/6598) +* Add deduction guide for `Kokkos::Array` [\#6373](https://github.com/kokkos/kokkos/pull/6373) +* Provide new public headers `` and `` [\#6687](https://github.com/kokkos/kokkos/pull/6687) +* Fix/improvement to `remove_if` parallel algorithm: use the provided execution space instance for temporary allocations and drop unnecessaryinitialization + avoid evaluating twice the predicate during final pass [\#6747](https://github.com/kokkos/kokkos/pull/6747) +* Add runtime function to query the number of devices and make device ID consistent with `KOKKOS_VISIBLE_DEVICES` [\#6713](https://github.com/kokkos/kokkos/pull/6713) +* simd: support `vector_aligned_tag` [\#6243](https://github.com/kokkos/kokkos/pull/6243) +* Avoid unnecessary allocation when default constructing Bitset [\#6524](https://github.com/kokkos/kokkos/pull/6524) +* Fix constness for views in std algorithms [\#6813](https://github.com/kokkos/kokkos/pull/6813) +* Improve error message on unsafe implicit conversion in MDRangePolicy [\#6855](https://github.com/kokkos/kokkos/pull/6855) +* CTAD (deduction guides) for RangePolicy [\#6850](https://github.com/kokkos/kokkos/pull/6850) +* CTAD (deduction guides) for MDRangePolicy [\#5516](https://github.com/kokkos/kokkos/pull/5516) + +### Build System Changes +* Require `Kokkos_ENABLE_ATOMICS_BYPASS` option to bypass atomic operation for Serial backend only builds [\#6692](https://github.com/kokkos/kokkos/pull/6692) +* Add support for RISCV and the Milk-V's Pioneer [\#6773](https://github.com/kokkos/kokkos/pull/6773) +* Add C++26 standard to CMake setup [\#6733](https://github.com/kokkos/kokkos/pull/6733) +* Fix Makefile when using gnu_generate_makefile.sh and make >= 4.3 [\#6606](https://github.com/kokkos/kokkos/pull/6606) +* Cuda: Fix configuring with CMake >= 3.28.4 - temporary fallback to internal CudaToolkit.cmake [\#6898](https://github.com/kokkos/kokkos/pull/6898) + +### Incompatibilities (i.e. breaking changes) +* Remove all `DEPRECATED_CODE_3` option and all code that was guarded by it [\#6523](https://github.com/kokkos/kokkos/pull/6523) +* Drop guards to accommodate external code defining `KOKKOS_ASSERT` [\#6665](https://github.com/kokkos/kokkos/pull/6665) +* `Profiling::ProfilingSection(std::string)` constructor marked explicit and nodiscard [\#6690](https://github.com/kokkos/kokkos/pull/6690) +* Add bound check preconditions for `RangePolicy` and `MDRangePolicy` [\#6617](https://github.com/kokkos/kokkos/pull/6617) [\#6726](https://github.com/kokkos/kokkos/pull/6726) +* Add checks for unsafe implicit conversions in RangePolicy [\#6754](https://github.com/kokkos/kokkos/pull/6754) +* Remove Kokkos::[b]half_t volatile overloads [\#6579](https://github.com/kokkos/kokkos/pull/6579) +* Remove KOKKOS_IMPL_DO_NOT_USE_PRINTF [\#6593](https://github.com/kokkos/kokkos/pull/6593) +* Check matching static extents in View constructor [\#5190 ](https://github.com/kokkos/kokkos/pull/5190) +* Tools(profiling): fix typo Kokkos_Tools_Optim[i]zationGoal [\#6642](https://github.com/kokkos/kokkos/pull/6642) +* Remove variadic range policy constructor (disallow passing multiple trailing chunk size arguments) [\#6845](https://github.com/kokkos/kokkos/pull/6845) +* Improve message on view out of bounds access and always abort [\#6861](https://github.com/kokkos/kokkos/pull/6861) +* Drop `KOKKOS_ENABLE_INTEL_MM_ALLOC` macro [\#6797](https://github.com/kokkos/kokkos/pull/6797) +* Remove `Kokkos::Experimental::LogicalMemorySpace` (without going through deprecation) [\#6557](https://github.com/kokkos/kokkos/pull/6557) +* Remove `Experimental::HBWSpace` and support for linking against memkind [\#6791](https://github.com/kokkos/kokkos/pull/6791) +* Drop librt TPL and associated `KOKKOS_ENABLE_LIBRT` macro [\#6798](https://github.com/kokkos/kokkos/pull/6798) +* Drop support for old CPU architectures (`ARCH_BGQ`, `ARCH_POWER7`, `ARCH_WSM` and associated `ARCH_SSE4` macro) [\#6806](https://github.com/kokkos/kokkos/pull/6806) +* Drop support for deprecated command-line arguments and environment variables [\#6744](https://github.com/kokkos/kokkos/pull/6744) + +### Deprecations +* Provide kokkos_swap as part of Core and deprecate Experimental::swap in Algorithms [\#6697](https://github.com/kokkos/kokkos/pull/6697) +* Deprecate {Cuda,HIP}::detect_device_count() and Cuda::[detect_]device_arch() [\#6710](https://github.com/kokkos/kokkos/pull/6710) +* Deprecate `ExecutionSpace::in_parallel()` [\#6582](https://github.com/kokkos/kokkos/pull/6582) + +### Bug Fixes +* Fix team-level MDRange reductions: [\#6511](https://github.com/kokkos/kokkos/pull/6511) +* Fix CUDA and SYCL small value type (16-bit) team reductions [\#5334](https://github.com/kokkos/kokkos/pull/5334) +* Enable `{transform_}exclusive_scan` in place [\#6667](https://github.com/kokkos/kokkos/pull/6667) +* `fill_random` overload that do not take an execution space instance argument should fence [\#6658](https://github.com/kokkos/kokkos/pull/6658) +* HIP,Cuda,OpenMPTarget: Fixup use provided execution space when copying host inaccessible reduction result [\#6777](https://github.com/kokkos/kokkos/pull/6777) +* Fix typo in `cuda_func_set_attribute[s]_wrapper` preventing proper setting of desired occupancy [\#6786](https://github.com/kokkos/kokkos/pull/6786) +* Avoid undefined behavior due to conversion between signed and unsigned integers in shift_{right, left}_team_impl [\#6821](https://github.com/kokkos/kokkos/pull/6821) +* Fix a bug in Makefile.kokkos when using AMD GPU architectures as `AMD_GFXYYY` [\#6892](https://github.com/kokkos/kokkos/pull/6892) + +## [4.2.01](https://github.com/kokkos/kokkos/tree/4.2.01) (2023-12-07) +[Full Changelog](https://github.com/kokkos/kokkos/compare/4.2.00...4.2.01) + +### Backend and Architecture Enhancements: + +#### CUDA: +- Add warp sync for `parallel_reduce` to avoid race condition [\#6630](https://github.com/kokkos/kokkos/pull/6630), [\#6746](https://github.com/kokkos/kokkos/pull/6746) + +#### HIP: +- Fix Graph "multiple definition of" linking error (missing `inline` specifier) [\#6624](https://github.com/kokkos/kokkos/pull/6624) +- Add support for gfx940 (AMD Instinct MI300 GPU) [\#6671](https://github.com/kokkos/kokkos/pull/6671) + +### Build System +- CMake: Don't let Kokkos set `CMAKE_CXX_FLAGS` for Trilinos builds [\#6742](https://github.com/kokkos/kokkos/pull/6742) + +### Bug Fixes +- Remove deprecation warning for `AllocationMechanism` for GCC <11.0 [\#6653](https://github.com/kokkos/kokkos/pull/6653) +- Fix bug early tools finalize with non-default host execution instances [\#6635](https://github.com/kokkos/kokkos/pull/6635) +- Fix various issues for MSVC CUDA builds [\#6659](https://github.com/kokkos/kokkos/pull/6659) +- Fix "extra `;`" warning with `-pedantic` flag in `` [\#6510](https://github.com/kokkos/kokkos/pull/6510) + +## [4.2.00](https://github.com/kokkos/kokkos/tree/4.2.00) (2023-11-06) +[Full Changelog](https://github.com/kokkos/kokkos/compare/4.1.00...4.2.00) + +### Features: +- SIMD: significant improvements to SIMD support and alignment with C++26 SIMD + - add `Kokkos::abs` overload for SIMD types [\#6069](https://github.com/kokkos/kokkos/pull/6069) + - add generator constructors [\#6347](https://github.com/kokkos/kokkos/pull/6347) + - convert binary operators to hidden friends [\#6320](https://github.com/kokkos/kokkos/pull/6320) + - add shift operators [\#6109](https://github.com/kokkos/kokkos/pull/6109) + - add `float` support [\#6177](https://github.com/kokkos/kokkos/pull/6177) + - add remaining `gather_from` and `scatter_to` overloads [\#6220](https://github.com/kokkos/kokkos/pull/6220) + - define simd math function overloads in the Kokkos namespace [\#6465](https://github.com/kokkos/kokkos/pull/6465), [\#6487](https://github.com/kokkos/kokkos/pull/6487) + - `Kokkos_ENABLE_NATIVE=ON` autodetects SIMD types supported [\#6188](https://github.com/kokkos/kokkos/pull/6188) + - fix AVX2 SIMD support for ZEN2 AMD CPU [\#6238](https://github.com/kokkos/kokkos/pull/6238) +- `Kokkos::printf` [\#6083](https://github.com/kokkos/kokkos/pull/6083) +- `Kokkos::sort`: support custom comparator [\#6253](https://github.com/kokkos/kokkos/pull/6253) +- `half_t` and `bhalf_t` numeric traits [\#5778](https://github.com/kokkos/kokkos/pull/5778) +- `half_t` and `bhalf_t` mixed comparisons [\#6407](https://github.com/kokkos/kokkos/pull/6407) +- `half_t` and `bhalf_t` mathematical functions [\#6124](https://github.com/kokkos/kokkos/pull/6124) +- `TeamThreadRange` `parallel_scan` with return value [\#6090](https://github.com/kokkos/kokkos/pull/6090), [\#6301](https://github.com/kokkos/kokkos/pull/6301), [\#6302](https://github.com/kokkos/kokkos/pull/6302), [\#6303](https://github.com/kokkos/kokkos/pull/6303), [\#6307](https://github.com/kokkos/kokkos/pull/6307) +- `ThreadVectorRange` `parallel_scan` with return value [\#6235](https://github.com/kokkos/kokkos/pull/6235), [\#6242](https://github.com/kokkos/kokkos/pull/6242), [\#6308](https://github.com/kokkos/kokkos/pull/6308), [\#6305](https://github.com/kokkos/kokkos/pull/6305), [\#6292](https://github.com/kokkos/kokkos/pull/6292) +- Add team-level std algorithms [\#6200](https://github.com/kokkos/kokkos/pull/6200), [\#6205](https://github.com/kokkos/kokkos/pull/6205), [\#6207](https://github.com/kokkos/kokkos/pull/6207), [\#6208](https://github.com/kokkos/kokkos/pull/6208), [\#6209](https://github.com/kokkos/kokkos/pull/6209), [\#6210](https://github.com/kokkos/kokkos/pull/6210), [\#6211](https://github.com/kokkos/kokkos/pull/6211), [\#6212](https://github.com/kokkos/kokkos/pull/6212), [\#6213](https://github.com/kokkos/kokkos/pull/6213), [\#6256](https://github.com/kokkos/kokkos/pull/6256), [\#6258](https://github.com/kokkos/kokkos/pull/6258), [\#6350](https://github.com/kokkos/kokkos/pull/6350), [\#6351](https://github.com/kokkos/kokkos/pull/6351) +- Serial: Allow for distinct execution space instances [\#6441](https://github.com/kokkos/kokkos/pull/6441) + +### Backend and Architecture Enhancements: + +#### CUDA: +- Fixed potential data race in Cuda `parallel_reduce` [\#6236](https://github.com/kokkos/kokkos/pull/6236) +- Use `cudaMallocAsync` by default [\#6402](https://github.com/kokkos/kokkos/pull/6402) +- Bugfix for using Kokkos from a thread of execution [\#6299](https://github.com/kokkos/kokkos/pull/6299) + +#### HIP: +- New naming convention for AMD GPU: VEGA906, VEGA908, VEGA90A, NAVI1030 to AMD_GFX906, AMD_GFX908, AMD_GFX90A, AMD_GFX1030 [\#6266](https://github.com/kokkos/kokkos/pull/6266) +- Add initial support for gfx942: [\#6358](https://github.com/kokkos/kokkos/pull/6358) +- Improve reduction performance [\#6229](https://github.com/kokkos/kokkos/pull/6229) +- Deprecate `HIP(hipStream_t,bool)` constructor [\#6401](https://github.com/kokkos/kokkos/pull/6401) +- Add support for Graph [\#6370](https://github.com/kokkos/kokkos/pull/6370) +- Improve reduction performance when using Teams [\#6284](https://github.com/kokkos/kokkos/pull/6284) +- Fix concurrency calculation [\#6479](https://github.com/kokkos/kokkos/pull/6479) +- Fix potential data race in HIP `parallel_reduce` [\#6429](https://github.com/kokkos/kokkos/pull/6429) + +#### SYCL: +- Enforce external `sycl::queues` to be in-order [\#6246](https://github.com/kokkos/kokkos/pull/6246) +- Improve reduction performance: [\#6272](https://github.com/kokkos/kokkos/pull/6272) [\#6271](https://github.com/kokkos/kokkos/pull/6271) [\#6270](https://github.com/kokkos/kokkos/pull/6270) [\#6264](https://github.com/kokkos/kokkos/pull/6264) +- Allow using the SYCL execution space on AMD GPUs [\#6321](https://github.com/kokkos/kokkos/pull/6321) +- Allow sorting via native oneDPL to support Views with stride=1 [\#6322](https://github.com/kokkos/kokkos/pull/6322) +- Make in-order queues the default via macro [\#6189](https://github.com/kokkos/kokkos/pull/6189) + +#### OpenACC: +- Support Clacc compiler [\#6250](https://github.com/kokkos/kokkos/pull/6250) + +### General Enhancements +- Add missing `is_*_view` traits and `is_*_view_v` helper variable templates for `DynRankView`, `DynamicView`, `OffsetView`, `ScatterView` containers [\#6195](https://github.com/kokkos/kokkos/pull/6195) +- Make `nvcc_wrapper` and `compiler_launcher` scripts more portable by switching to a `#!/usr/bin/env` shebang [\#6357](https://github.com/kokkos/kokkos/pull/6357) +- Add an improved `Kokkos::malloc` / `Kokkos::free` performance test [\#6377](https://github.com/kokkos/kokkos/pull/6377) +- Ensure `Views` with `size==0` can be used with `deep_copy` [\#6273](https://github.com/kokkos/kokkos/pull/6273) +- `Kokkos::abort` is moved to header `Kokkos_Abort.hpp` [\#6445](https://github.com/kokkos/kokkos/pull/6445) +- `KOKKOS_ASSERT`, `KOKKOS_EXPECTS`, `KOKKOS_ENSURES` are moved to header `Kokkos_Assert.hpp` [\#6445](https://github.com/kokkos/kokkos/pull/6445) +- Add a permuted-index mode to the gups benchmark [\#6378](https://github.com/kokkos/kokkos/pull/6378) +- Check for overflow during backend initialization [\#6159](https://github.com/kokkos/kokkos/pull/6159) +- Make constraints on `Kokkos::sort` more visible [\#6234](https://github.com/kokkos/kokkos/pull/6234) and cleanup API [\#6239](https://github.com/kokkos/kokkos/pull/6239) +- Add converting assignment to `DualView`: [\#6474](https://github.com/kokkos/kokkos/pull/6474) + + +### Build System Changes + +- Export `Kokkos_CXX_COMPILER_VERSION` [\#6282](https://github.com/kokkos/kokkos/pull/6282) +- Disable default oneDPL support in Trilinos [\#6342](https://github.com/kokkos/kokkos/pull/6342) + +### Incompatibilities (i.e. breaking changes) + - Ensure that `Kokkos::complex` only gets instantiated for cv-unqualified floating-point types [\#6251](https://github.com/kokkos/kokkos/pull/6251) + - Removed (deprecated-3) support for volatile join operators in reductions [\#6385](https://github.com/kokkos/kokkos/pull/6385) + - Enforce `ViewCtorArgs` restrictions for `create_mirror_view` [\#6304](https://github.com/kokkos/kokkos/pull/6304) + - SIMD types for ARM NEON are not autodetected anymore but need `Kokkos_ARCH_ARM_NEON` or `Kokkos_ARCH_NATIVE=ON` [\#6394](https://github.com/kokkos/kokkos/pull/6394) + - Remove `#include ` from headers where possible [\#6482](https://github.com/kokkos/kokkos/pull/6482) + +### Deprecations +- Deprecated `Kokkos::vector` [\#6252](https://github.com/kokkos/kokkos/pull/6252) +- All host allocation mechanisms except for `STD_MALLOC` have been deprecated [\#6341](https://github.com/kokkos/kokkos/pull/6341) + +### Bug Fixes + - Missing memory fence in `RandomPool::free_state` functions [\#6290](https://github.com/kokkos/kokkos/pull/6290) + - Fix for corner case in `Kokkos::Experimental::is_partitioned` algorithm [\#6257](https://github.com/kokkos/kokkos/pull/6257) + - Fix initialization of scratch lock variables in the `Cuda` backend [\#6433](https://github.com/kokkos/kokkos/pull/6433) + - Fixes for `Kokkos::Array` [\#6372](https://github.com/kokkos/kokkos/pull/6372) + - Fixed symlink configure issue for Windows [\#6241](https://github.com/kokkos/kokkos/pull/6241) + - OpenMPTarget init-join fix [\#6444](https://github.com/kokkos/kokkos/pull/6444) + - Fix atomic operations bug for Min and Max [\#6435](https://github.com/kokkos/kokkos/pull/6435) + - Fix implementation for `cyl_bessel_i0` [\#6484](https://github.com/kokkos/kokkos/pull/6484) + - Fix various NVCC warnings in `BinSort`, `Array`, and bit manipulation function templates [\#6483](https://github.com/kokkos/kokkos/pull/6483) + +## [4.1.00](https://github.com/kokkos/kokkos/tree/4.1.00) (2023-06-16) [Full Changelog](https://github.com/kokkos/kokkos/compare/4.0.01...4.1.00) ### Features: @@ -887,95 +1188,95 @@ - Major update for OpenMPTarget: many capabilities now work. For details contact us. - Added DPC++/SYCL backend: primary capabilites are working. - Added Kokkos Graph API analogous to CUDA Graphs. -- Added parallel_scan support with TeamThreadRange [\#3536](https://github.com/kokkos/kokkos/pull/#3536) -- Added Logical Memory Spaces [\#3546](https://github.com/kokkos/kokkos/pull/#3546) -- Added initial half precision support [\#3439](https://github.com/kokkos/kokkos/pull/#3439) -- Experimental feature: control cuda occupancy [\#3379](https://github.com/kokkos/kokkos/pull/#3379) +- Added parallel_scan support with TeamThreadRange [\#3536](https://github.com/kokkos/kokkos/pull/3536) +- Added Logical Memory Spaces [\#3546](https://github.com/kokkos/kokkos/pull/3546) +- Added initial half precision support [\#3439](https://github.com/kokkos/kokkos/pull/3439) +- Experimental feature: control cuda occupancy [\#3379](https://github.com/kokkos/kokkos/pull/3379) **Implemented enhancements Backends and Archs:** -- Add a64fx and fujitsu Compiler support [\#3614](https://github.com/kokkos/kokkos/pull/#3614) -- Adding support for AMD gfx908 archictecture [\#3375](https://github.com/kokkos/kokkos/pull/#3375) -- SYCL parallel\_for MDRangePolicy [\#3583](https://github.com/kokkos/kokkos/pull/#3583) -- SYCL add parallel\_scan [\#3577](https://github.com/kokkos/kokkos/pull/#3577) -- SYCL custom reductions [\#3544](https://github.com/kokkos/kokkos/pull/#3544) -- SYCL Enable container unit tests [\#3550](https://github.com/kokkos/kokkos/pull/#3550) -- SYCL feature level 5 [\#3480](https://github.com/kokkos/kokkos/pull/#3480) -- SYCL Feature level 4 (parallel\_for) [\#3474](https://github.com/kokkos/kokkos/pull/#3474) -- SYCL feature level 3 [\#3451](https://github.com/kokkos/kokkos/pull/#3451) -- SYCL feature level 2 [\#3447](https://github.com/kokkos/kokkos/pull/#3447) -- OpenMPTarget: Hierarchial reduction for + operator on scalars [\#3504](https://github.com/kokkos/kokkos/pull/#3504) -- OpenMPTarget hierarchical [\#3411](https://github.com/kokkos/kokkos/pull/#3411) -- HIP Add Impl::atomic\_[store,load] [\#3440](https://github.com/kokkos/kokkos/pull/#3440) -- HIP enable global lock arrays [\#3418](https://github.com/kokkos/kokkos/pull/#3418) -- HIP Implement multiple occupancy paths for various HIP kernel launchers [\#3366](https://github.com/kokkos/kokkos/pull/#3366) +- Add a64fx and fujitsu Compiler support [\#3614](https://github.com/kokkos/kokkos/pull/3614) +- Adding support for AMD gfx908 archictecture [\#3375](https://github.com/kokkos/kokkos/pull/3375) +- SYCL parallel\_for MDRangePolicy [\#3583](https://github.com/kokkos/kokkos/pull/3583) +- SYCL add parallel\_scan [\#3577](https://github.com/kokkos/kokkos/pull/3577) +- SYCL custom reductions [\#3544](https://github.com/kokkos/kokkos/pull/3544) +- SYCL Enable container unit tests [\#3550](https://github.com/kokkos/kokkos/pull/3550) +- SYCL feature level 5 [\#3480](https://github.com/kokkos/kokkos/pull/3480) +- SYCL Feature level 4 (parallel\_for) [\#3474](https://github.com/kokkos/kokkos/pull/3474) +- SYCL feature level 3 [\#3451](https://github.com/kokkos/kokkos/pull/3451) +- SYCL feature level 2 [\#3447](https://github.com/kokkos/kokkos/pull/3447) +- OpenMPTarget: Hierarchial reduction for + operator on scalars [\#3504](https://github.com/kokkos/kokkos/pull/3504) +- OpenMPTarget hierarchical [\#3411](https://github.com/kokkos/kokkos/pull/3411) +- HIP Add Impl::atomic\_[store,load] [\#3440](https://github.com/kokkos/kokkos/pull/3440) +- HIP enable global lock arrays [\#3418](https://github.com/kokkos/kokkos/pull/3418) +- HIP Implement multiple occupancy paths for various HIP kernel launchers [\#3366](https://github.com/kokkos/kokkos/pull/3366) **Implemented enhancements Policies:** -- MDRangePolicy: Let it be semiregular [\#3494](https://github.com/kokkos/kokkos/pull/#3494) -- MDRangePolicy: Check narrowing conversion in construction [\#3527](https://github.com/kokkos/kokkos/pull/#3527) -- MDRangePolicy: CombinedReducers support [\#3395](https://github.com/kokkos/kokkos/pull/#3395) -- Kokkos Graph: Interface and Default Implementation [\#3362](https://github.com/kokkos/kokkos/pull/#3362) -- Kokkos Graph: add Cuda Graph implementation [\#3369](https://github.com/kokkos/kokkos/pull/#3369) -- TeamPolicy: implemented autotuning of team sizes and vector lengths [\#3206](https://github.com/kokkos/kokkos/pull/#3206) -- RangePolicy: Initialize all data members in default constructor [\#3509](https://github.com/kokkos/kokkos/pull/#3509) +- MDRangePolicy: Let it be semiregular [\#3494](https://github.com/kokkos/kokkos/pull/3494) +- MDRangePolicy: Check narrowing conversion in construction [\#3527](https://github.com/kokkos/kokkos/pull/3527) +- MDRangePolicy: CombinedReducers support [\#3395](https://github.com/kokkos/kokkos/pull/3395) +- Kokkos Graph: Interface and Default Implementation [\#3362](https://github.com/kokkos/kokkos/pull/3362) +- Kokkos Graph: add Cuda Graph implementation [\#3369](https://github.com/kokkos/kokkos/pull/3369) +- TeamPolicy: implemented autotuning of team sizes and vector lengths [\#3206](https://github.com/kokkos/kokkos/pull/3206) +- RangePolicy: Initialize all data members in default constructor [\#3509](https://github.com/kokkos/kokkos/pull/3509) **Implemented enhancements BuildSystem:** -- Auto-generate core test files for all backends [\#3488](https://github.com/kokkos/kokkos/pull/#3488) -- Avoid rewriting test files when calling cmake [\#3548](https://github.com/kokkos/kokkos/pull/#3548) -- RULE\_LAUNCH\_COMPILE and RULE\_LAUNCH\_LINK system for nvcc\_wrapper [\#3136](https://github.com/kokkos/kokkos/pull/#3136) -- Adding -include as a known argument to nvcc\_wrapper [\#3434](https://github.com/kokkos/kokkos/pull/#3434) -- Install hpcbind script [\#3402](https://github.com/kokkos/kokkos/pull/#3402) -- cmake/kokkos\_tribits.cmake: add parsing for args [\#3457](https://github.com/kokkos/kokkos/pull/#3457) +- Auto-generate core test files for all backends [\#3488](https://github.com/kokkos/kokkos/pull/3488) +- Avoid rewriting test files when calling cmake [\#3548](https://github.com/kokkos/kokkos/pull/3548) +- RULE\_LAUNCH\_COMPILE and RULE\_LAUNCH\_LINK system for nvcc\_wrapper [\#3136](https://github.com/kokkos/kokkos/pull/3136) +- Adding -include as a known argument to nvcc\_wrapper [\#3434](https://github.com/kokkos/kokkos/pull/3434) +- Install hpcbind script [\#3402](https://github.com/kokkos/kokkos/pull/3402) +- cmake/kokkos\_tribits.cmake: add parsing for args [\#3457](https://github.com/kokkos/kokkos/pull/3457) **Implemented enhancements Tools:** -- Changed namespacing of Kokkos::Tools::Impl::Impl::tune\_policy [\#3455](https://github.com/kokkos/kokkos/pull/#3455) -- Delegate to an impl allocate/deallocate method to allow specifying a SpaceHandle for MemorySpaces [\#3530](https://github.com/kokkos/kokkos/pull/#3530) -- Use the Kokkos Profiling interface rather than the Impl interface [\#3518](https://github.com/kokkos/kokkos/pull/#3518) -- Runtime option for tuning [\#3459](https://github.com/kokkos/kokkos/pull/#3459) -- Dual View Tool Events [\#3326](https://github.com/kokkos/kokkos/pull/#3326) +- Changed namespacing of Kokkos::Tools::Impl::Impl::tune\_policy [\#3455](https://github.com/kokkos/kokkos/pull/3455) +- Delegate to an impl allocate/deallocate method to allow specifying a SpaceHandle for MemorySpaces [\#3530](https://github.com/kokkos/kokkos/pull/3530) +- Use the Kokkos Profiling interface rather than the Impl interface [\#3518](https://github.com/kokkos/kokkos/pull/3518) +- Runtime option for tuning [\#3459](https://github.com/kokkos/kokkos/pull/3459) +- Dual View Tool Events [\#3326](https://github.com/kokkos/kokkos/pull/3326) **Implemented enhancements Other:** -- Abort on errors instead of just printing [\#3528](https://github.com/kokkos/kokkos/pull/#3528) -- Enable C++14 macros unconditionally [\#3449](https://github.com/kokkos/kokkos/pull/#3449) -- Make ViewMapping trivially copyable [\#3436](https://github.com/kokkos/kokkos/pull/#3436) -- Rename struct ViewMapping to class [\#3435](https://github.com/kokkos/kokkos/pull/#3435) -- Replace enums in Kokkos\_ViewMapping.hpp (removes -Wextra) [\#3422](https://github.com/kokkos/kokkos/pull/#3422) -- Use bool for enums representing bools [\#3416](https://github.com/kokkos/kokkos/pull/#3416) -- Fence active instead of default execution space instances [\#3388](https://github.com/kokkos/kokkos/pull/#3388) -- Refactor parallel\_reduce fence usage [\#3359](https://github.com/kokkos/kokkos/pull/#3359) -- Moved Space EBO helpers to Kokkos\_EBO [\#3357](https://github.com/kokkos/kokkos/pull/#3357) -- Add remove\_cvref type trait [\#3340](https://github.com/kokkos/kokkos/pull/#3340) -- Adding identity type traits and update definition of identity\_t alias [\#3339](https://github.com/kokkos/kokkos/pull/#3339) -- Add is\_specialization\_of type trait [\#3338](https://github.com/kokkos/kokkos/pull/#3338) -- Make ScratchMemorySpace semi-regular [\#3309](https://github.com/kokkos/kokkos/pull/#3309) -- Optimize min/max atomics with early exit on no-op case [\#3265](https://github.com/kokkos/kokkos/pull/#3265) -- Refactor Backend Development [\#2941](https://github.com/kokkos/kokkos/pull/#2941) +- Abort on errors instead of just printing [\#3528](https://github.com/kokkos/kokkos/pull/3528) +- Enable C++14 macros unconditionally [\#3449](https://github.com/kokkos/kokkos/pull/3449) +- Make ViewMapping trivially copyable [\#3436](https://github.com/kokkos/kokkos/pull/3436) +- Rename struct ViewMapping to class [\#3435](https://github.com/kokkos/kokkos/pull/3435) +- Replace enums in Kokkos\_ViewMapping.hpp (removes -Wextra) [\#3422](https://github.com/kokkos/kokkos/pull/3422) +- Use bool for enums representing bools [\#3416](https://github.com/kokkos/kokkos/pull/3416) +- Fence active instead of default execution space instances [\#3388](https://github.com/kokkos/kokkos/pull/3388) +- Refactor parallel\_reduce fence usage [\#3359](https://github.com/kokkos/kokkos/pull/3359) +- Moved Space EBO helpers to Kokkos\_EBO [\#3357](https://github.com/kokkos/kokkos/pull/3357) +- Add remove\_cvref type trait [\#3340](https://github.com/kokkos/kokkos/pull/3340) +- Adding identity type traits and update definition of identity\_t alias [\#3339](https://github.com/kokkos/kokkos/pull/3339) +- Add is\_specialization\_of type trait [\#3338](https://github.com/kokkos/kokkos/pull/3338) +- Make ScratchMemorySpace semi-regular [\#3309](https://github.com/kokkos/kokkos/pull/3309) +- Optimize min/max atomics with early exit on no-op case [\#3265](https://github.com/kokkos/kokkos/pull/3265) +- Refactor Backend Development [\#2941](https://github.com/kokkos/kokkos/pull/2941) **Fixed bugs:** -- Fixup MDRangePolicy construction from Kokkos arrays [\#3591](https://github.com/kokkos/kokkos/pull/#3591) -- Add atomic functions for unsigned long long using gcc built-in [\#3588](https://github.com/kokkos/kokkos/pull/#3588) -- Fixup silent pointless comparison with zero in checked\_narrow\_cast (compiler workaround) [\#3566](https://github.com/kokkos/kokkos/pull/#3566) -- Fixes for ROCm 3.9 [\#3565](https://github.com/kokkos/kokkos/pull/#3565) -- Fix windows build issues which crept in for the CUDA build [\#3532](https://github.com/kokkos/kokkos/pull/#3532) -- HIP Fix atomics of large data types and clean up lock arrays [\#3529](https://github.com/kokkos/kokkos/pull/#3529) -- Pthreads fix exception resulting from 0 grain size [\#3510](https://github.com/kokkos/kokkos/pull/#3510) -- Fixup do not require atomic operation to be default constructible [\#3503](https://github.com/kokkos/kokkos/pull/#3503) -- Fix race condition in HIP backend [\#3467](https://github.com/kokkos/kokkos/pull/#3467) -- Replace KOKKOS\_DEBUG with KOKKOS\_ENABLE\_DEBUG [\#3458](https://github.com/kokkos/kokkos/pull/#3458) -- Fix multi-stream team scratch space definition for HIP [\#3398](https://github.com/kokkos/kokkos/pull/#3398) -- HIP fix template deduction [\#3393](https://github.com/kokkos/kokkos/pull/#3393) -- Fix compiling with HIP and C++17 [\#3390](https://github.com/kokkos/kokkos/pull/#3390) -- Fix sigFPE in HIP blocksize deduction [\#3378](https://github.com/kokkos/kokkos/pull/#3378) -- Type alias change: replace CS with CTS to avoid conflicts with NVSHMEM [\#3348](https://github.com/kokkos/kokkos/pull/#3348) -- Clang compilation of CUDA backend on Windows [\#3345](https://github.com/kokkos/kokkos/pull/#3345) -- Fix HBW support [\#3343](https://github.com/kokkos/kokkos/pull/#3343) -- Added missing fences to unique token [\#3260](https://github.com/kokkos/kokkos/pull/#3260) +- Fixup MDRangePolicy construction from Kokkos arrays [\#3591](https://github.com/kokkos/kokkos/pull/3591) +- Add atomic functions for unsigned long long using gcc built-in [\#3588](https://github.com/kokkos/kokkos/pull/3588) +- Fixup silent pointless comparison with zero in checked\_narrow\_cast (compiler workaround) [\#3566](https://github.com/kokkos/kokkos/pull/3566) +- Fixes for ROCm 3.9 [\#3565](https://github.com/kokkos/kokkos/pull/3565) +- Fix windows build issues which crept in for the CUDA build [\#3532](https://github.com/kokkos/kokkos/pull/3532) +- HIP Fix atomics of large data types and clean up lock arrays [\#3529](https://github.com/kokkos/kokkos/pull/3529) +- Pthreads fix exception resulting from 0 grain size [\#3510](https://github.com/kokkos/kokkos/pull/3510) +- Fixup do not require atomic operation to be default constructible [\#3503](https://github.com/kokkos/kokkos/pull/3503) +- Fix race condition in HIP backend [\#3467](https://github.com/kokkos/kokkos/pull/3467) +- Replace KOKKOS\_DEBUG with KOKKOS\_ENABLE\_DEBUG [\#3458](https://github.com/kokkos/kokkos/pull/3458) +- Fix multi-stream team scratch space definition for HIP [\#3398](https://github.com/kokkos/kokkos/pull/3398) +- HIP fix template deduction [\#3393](https://github.com/kokkos/kokkos/pull/3393) +- Fix compiling with HIP and C++17 [\#3390](https://github.com/kokkos/kokkos/pull/3390) +- Fix sigFPE in HIP blocksize deduction [\#3378](https://github.com/kokkos/kokkos/pull/3378) +- Type alias change: replace CS with CTS to avoid conflicts with NVSHMEM [\#3348](https://github.com/kokkos/kokkos/pull/3348) +- Clang compilation of CUDA backend on Windows [\#3345](https://github.com/kokkos/kokkos/pull/3345) +- Fix HBW support [\#3343](https://github.com/kokkos/kokkos/pull/3343) +- Added missing fences to unique token [\#3260](https://github.com/kokkos/kokkos/pull/3260) **Incompatibilities:** -- Remove unused utilities (forward, move, and expand\_variadic) from Kokkos::Impl [\#3535](https://github.com/kokkos/kokkos/pull/#3535) -- Remove unused traits [\#3534](https://github.com/kokkos/kokkos/pull/#3534) -- HIP: Remove old HCC code [\#3301](https://github.com/kokkos/kokkos/pull/#3301) -- Prepare for deprecation of ViewAllocateWithoutInitializing [\#3264](https://github.com/kokkos/kokkos/pull/#3264) -- Remove ROCm backend [\#3148](https://github.com/kokkos/kokkos/pull/#3148) +- Remove unused utilities (forward, move, and expand\_variadic) from Kokkos::Impl [\#3535](https://github.com/kokkos/kokkos/pull/3535) +- Remove unused traits [\#3534](https://github.com/kokkos/kokkos/pull/3534) +- HIP: Remove old HCC code [\#3301](https://github.com/kokkos/kokkos/pull/3301) +- Prepare for deprecation of ViewAllocateWithoutInitializing [\#3264](https://github.com/kokkos/kokkos/pull/3264) +- Remove ROCm backend [\#3148](https://github.com/kokkos/kokkos/pull/3148) ## [3.2.01](https://github.com/kokkos/kokkos/tree/3.2.01) (2020-11-17) [Full Changelog](https://github.com/kokkos/kokkos/compare/3.2.00...3.2.01) diff --git a/CITATION.cff b/CITATION.cff new file mode 100644 index 0000000000..28c674c451 --- /dev/null +++ b/CITATION.cff @@ -0,0 +1,65 @@ +cff-version: 1.2.0 +title: Kokkos +message: >- + If you use this software, please cite the overview paper +type: software +authors: + - name: The Kokkos authors + website: https://kokkos.org/community/team/ +identifiers: + - type: url + website: https://kokkos.org/kokkos-core-wiki/citation.html +repository-code: 'https://github.com/kokkos/kokkos' +url: 'https://kokkos.org/' +license: Apache-2.0 +preferred-citation: + type: article + authors: + - given-names: Christian R. + family-names: Trott + - given-names: Damien + family-names: Lebrun-Grandié + - given-names: Daniel + family-names: Arndt + - family-names: Ciesko + given-names: Jan + - given-names: Vinh + family-names: Dang + - family-names: Ellingwood + given-names: Nathan + - given-names: Rahulkumar + family-names: Gayatri + - given-names: Evan + family-names: Harvey + - given-names: Daisy S. + family-names: Hollman + - given-names: Dan + family-names: Ibanez + - given-names: Nevin + family-names: Liber + - given-names: Jonathan + family-names: Madsen + - given-names: Jeff + family-names: Miles + - given-names: David + family-names: Poliakoff + - given-names: Amy + family-names: Powell + - given-names: Sivasankaran + family-names: Rajamanickam + - given-names: Mikael + family-names: Simberg + - given-names: Dan + family-names: Sunderland + - given-names: Bruno + family-names: Turcksin + - given-names: Jeremiah + family-names: Wilke + doi: 10.1109/TPDS.2021.3097283 + journal: IEEE Transactions on Parallel and Distributed Systems + start: 805 + end: 817 + title: "Kokkos 3: Programming Model Extensions for the Exascale Era" + volume: 33 + issue: 4 + year: 2022 diff --git a/CMakeLists.txt b/CMakeLists.txt index 895cee6a08..054de2c1da 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -150,8 +150,8 @@ ENDIF() set(Kokkos_VERSION_MAJOR 4) -set(Kokkos_VERSION_MINOR 1) -set(Kokkos_VERSION_PATCH 00) +set(Kokkos_VERSION_MINOR 4) +set(Kokkos_VERSION_PATCH 0) set(Kokkos_VERSION "${Kokkos_VERSION_MAJOR}.${Kokkos_VERSION_MINOR}.${Kokkos_VERSION_PATCH}") message(STATUS "Kokkos version: ${Kokkos_VERSION}") math(EXPR KOKKOS_VERSION "${Kokkos_VERSION_MAJOR} * 10000 + ${Kokkos_VERSION_MINOR} * 100 + ${Kokkos_VERSION_PATCH}") @@ -252,7 +252,6 @@ ENDIF() # subpackages ## This restores the old behavior of ProjectCompilerPostConfig.cmake -# It sets the CMAKE_CXX_FLAGS globally to those used by Kokkos # We must do this before KOKKOS_PACKAGE_DECL IF (KOKKOS_HAS_TRILINOS) # Overwrite the old flags at the top-level @@ -280,21 +279,13 @@ IF (KOKKOS_HAS_TRILINOS) SET(KOKKOSCORE_XCOMPILER_OPTIONS "${KOKKOSCORE_XCOMPILER_OPTIONS} -Xcompiler ${XCOMP_FLAG}") LIST(APPEND KOKKOS_ALL_COMPILE_OPTIONS -Xcompiler ${XCOMP_FLAG}) ENDFOREACH() - SET(KOKKOSCORE_CXX_FLAGS "${KOKKOSCORE_COMPILE_OPTIONS} ${KOKKOSCORE_XCOMPILER_OPTIONS}") IF (KOKKOS_ENABLE_CUDA) STRING(REPLACE ";" " " KOKKOSCORE_CUDA_OPTIONS "${KOKKOS_CUDA_OPTIONS}") FOREACH(CUDAFE_FLAG ${KOKKOS_CUDAFE_OPTIONS}) SET(KOKKOSCORE_CUDAFE_OPTIONS "${KOKKOSCORE_CUDAFE_OPTIONS} -Xcudafe ${CUDAFE_FLAG}") LIST(APPEND KOKKOS_ALL_COMPILE_OPTIONS -Xcudafe ${CUDAFE_FLAG}) ENDFOREACH() - SET(KOKKOSCORE_CXX_FLAGS "${KOKKOSCORE_CXX_FLAGS} ${KOKKOSCORE_CUDA_OPTIONS} ${KOKKOSCORE_CUDAFE_OPTIONS}") ENDIF() - # Both parent scope and this package - # In ProjectCompilerPostConfig.cmake, we capture the "global" flags Trilinos wants in - # TRILINOS_TOPLEVEL_CXX_FLAGS - SET(CMAKE_CXX_FLAGS "${TRILINOS_TOPLEVEL_CXX_FLAGS} ${KOKKOSCORE_CXX_FLAGS}" PARENT_SCOPE) - SET(CMAKE_CXX_FLAGS "${TRILINOS_TOPLEVEL_CXX_FLAGS} ${KOKKOSCORE_CXX_FLAGS}") - #CMAKE_CXX_FLAGS will get added to Kokkos and Kokkos dependencies automatically here #These flags get set up in KOKKOS_PACKAGE_DECL, which means they #must be configured before KOKKOS_PACKAGE_DECL SET(KOKKOS_ALL_COMPILE_OPTIONS @@ -314,7 +305,6 @@ KOKKOS_PROCESS_SUBPACKAGES() # E) If Kokkos itself is enabled, process the Kokkos package # -KOKKOS_EXCLUDE_AUTOTOOLS_FILES() KOKKOS_PACKAGE_POSTPROCESS() KOKKOS_CONFIGURE_CORE() diff --git a/Copyright.txt b/Copyright.txt index 5e2f8d8647..cbba3efc7b 100644 --- a/Copyright.txt +++ b/Copyright.txt @@ -1,41 +1,8 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER +************************************************************************ + + Kokkos v. 4.0 + Copyright (2022) National Technology & Engineering + Solutions of Sandia, LLC (NTESS). + +Under the terms of Contract DE-NA0003525 with NTESS, +the U.S. Government retains certain rights in this software. diff --git a/LICENSE b/LICENSE index 6572cc2db0..4d9d69d7c4 100644 --- a/LICENSE +++ b/LICENSE @@ -1,13 +1,3 @@ - ************************************************************************ - - Kokkos v. 4.0 - Copyright (2022) National Technology & Engineering - Solutions of Sandia, LLC (NTESS). - - Under the terms of Contract DE-NA0003525 with NTESS, - the U.S. Government retains certain rights in this software. - - ============================================================================== Kokkos is under the Apache License v2.0 with LLVM Exceptions: ============================================================================== diff --git a/Makefile.kokkos b/Makefile.kokkos index 9436b75b9e..15f24f3073 100644 --- a/Makefile.kokkos +++ b/Makefile.kokkos @@ -1,8 +1,8 @@ # Default settings common options. KOKKOS_VERSION_MAJOR = 4 -KOKKOS_VERSION_MINOR = 1 -KOKKOS_VERSION_PATCH = 00 +KOKKOS_VERSION_MINOR = 4 +KOKKOS_VERSION_PATCH = 0 KOKKOS_VERSION = $(shell echo $(KOKKOS_VERSION_MAJOR)*10000+$(KOKKOS_VERSION_MINOR)*100+$(KOKKOS_VERSION_PATCH) | bc) # Options: Cuda,HIP,SYCL,OpenMPTarget,OpenMP,Threads,Serial @@ -11,15 +11,15 @@ KOKKOS_DEVICES ?= "Threads" # Options: # Intel: KNC,KNL,SNB,HSW,BDW,SKL,SKX,ICL,ICX,SPR # NVIDIA: Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal60,Pascal61,Volta70,Volta72,Turing75,Ampere80,Ampere86,Ada89,Hopper90 -# ARM: ARMv80,ARMv81,ARMv8-ThunderX,ARMv8-TX2,A64FX -# IBM: BGQ,Power7,Power8,Power9 -# AMD-GPUS: Vega906,Vega908,Vega90A,Navi1030 +# ARM: ARMv80,ARMv81,ARMv8-ThunderX,ARMv8-TX2,A64FX,ARMv9-Grace +# IBM: Power8,Power9 +# AMD-GPUS: AMD_GFX906,AMD_GFX908,AMD_GFX90A,AMD_GFX940,AMD_GFX942,AMD_GFX1030,AMD_GFX1100 # AMD-CPUS: AMDAVX,Zen,Zen2,Zen3 -# Intel-GPUs: Gen9,Gen11,Gen12LP,DG1,XeHP,PVC +# Intel-GPUs: Intel_Gen,Intel_Gen9,Intel_Gen11,Intel_Gen12LP,Intel_DG1,Intel_XeHP,Intel_PVC KOKKOS_ARCH ?= "" # Options: yes,no KOKKOS_DEBUG ?= "no" -# Options: hwloc,librt,experimental_memkind +# Options: hwloc KOKKOS_USE_TPLS ?= "" # Options: c++17,c++1z,c++20,c++2a,c++23,c++2b KOKKOS_CXX_STANDARD ?= "c++17" @@ -30,7 +30,7 @@ KOKKOS_TRIBITS ?= "no" KOKKOS_STANDALONE_CMAKE ?= "no" # Default settings specific options. -# Options: force_uvm,use_ldg,rdc,enable_lambda,enable_constexpr +# Options: force_uvm,use_ldg,rdc,enable_lambda,enable_constexpr,disable_malloc_async KOKKOS_CUDA_OPTIONS ?= "" # Options: rdc @@ -46,7 +46,7 @@ uppercase_internal=$(if $1,$$(subst $(firstword $1),$(call uppercase_internal,$( uppercase=$(eval uppercase_RESULT:=$(call uppercase_internal,$(uppercase_TABLE),$1))$(uppercase_RESULT) # Return a 1 if a string contains a substring and 0 if not # Note the search string should be without '"' -# Example: $(call kokkos_has_string,"hwloc,librt",hwloc) +# Example: $(call kokkos_has_string,"hwloc,libdl",hwloc) # Will return a 1 kokkos_has_string=$(if $(findstring $(call uppercase,$2),$(call uppercase,$1)),1,0) # Returns 1 if the path exists, 0 otherwise @@ -63,11 +63,11 @@ KOKKOS_INTERNAL_ENABLE_CXX20 := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD), KOKKOS_INTERNAL_ENABLE_CXX2A := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD),c++2a) KOKKOS_INTERNAL_ENABLE_CXX23 := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD),c++23) KOKKOS_INTERNAL_ENABLE_CXX2B := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD),c++2b) +KOKKOS_INTERNAL_ENABLE_CXX26 := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD),c++26) +KOKKOS_INTERNAL_ENABLE_CXX2C := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD),c++2c) # Check for external libraries. KOKKOS_INTERNAL_USE_HWLOC := $(call kokkos_has_string,$(KOKKOS_USE_TPLS),hwloc) -KOKKOS_INTERNAL_USE_LIBRT := $(call kokkos_has_string,$(KOKKOS_USE_TPLS),librt) -KOKKOS_INTERNAL_USE_MEMKIND := $(call kokkos_has_string,$(KOKKOS_USE_TPLS),experimental_memkind) # Check for advanced settings. KOKKOS_INTERNAL_ENABLE_COMPILER_WARNINGS := $(call kokkos_has_string,$(KOKKOS_OPTIONS),compiler_warnings) @@ -82,6 +82,7 @@ KOKKOS_INTERNAL_CUDA_USE_UVM := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS), KOKKOS_INTERNAL_CUDA_USE_RELOC := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS),rdc) KOKKOS_INTERNAL_CUDA_USE_LAMBDA := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS),enable_lambda) KOKKOS_INTERNAL_CUDA_USE_CONSTEXPR := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS),enable_constexpr) +KOKKOS_INTERNAL_CUDA_DISABLE_MALLOC_ASYNC := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS),disable_malloc_async) KOKKOS_INTERNAL_HPX_ENABLE_ASYNC_DISPATCH := $(call kokkos_has_string,$(KOKKOS_HPX_OPTIONS),enable_async_dispatch) # deprecated KOKKOS_INTERNAL_ENABLE_DESUL_ATOMICS := $(call kokkos_has_string,$(KOKKOS_OPTIONS),enable_desul_atomics) @@ -307,7 +308,6 @@ endif # Intel based. KOKKOS_INTERNAL_USE_ARCH_KNC := $(call kokkos_has_string,$(KOKKOS_ARCH),KNC) -KOKKOS_INTERNAL_USE_ARCH_WSM := $(call kokkos_has_string,$(KOKKOS_ARCH),WSM) KOKKOS_INTERNAL_USE_ARCH_SNB := $(call kokkos_has_string,$(KOKKOS_ARCH),SNB) KOKKOS_INTERNAL_USE_ARCH_HSW := $(call kokkos_has_string,$(KOKKOS_ARCH),HSW) KOKKOS_INTERNAL_USE_ARCH_BDW := $(call kokkos_has_string,$(KOKKOS_ARCH),BDW) @@ -318,12 +318,43 @@ KOKKOS_INTERNAL_USE_ARCH_ICL := $(call kokkos_has_string,$(KOKKOS_ARCH),ICL) KOKKOS_INTERNAL_USE_ARCH_ICX := $(call kokkos_has_string,$(KOKKOS_ARCH),ICX) KOKKOS_INTERNAL_USE_ARCH_SPR := $(call kokkos_has_string,$(KOKKOS_ARCH),SPR) -KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelGen) +# Traditionally, we supported, e.g., IntelGen9 instead of Intel_Gen9. The latter +# matches the CMake option but we also accept the former for backward-compatibility. KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN9 := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelGen9) +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN9), 0) + KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN9 := $(call kokkos_has_string,$(KOKKOS_ARCH),Intel_Gen9) +endif KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN11 := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelGen11) +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN11), 0) + KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN11 := $(call kokkos_has_string,$(KOKKOS_ARCH),Intel_Gen11) +endif KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN12LP := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelGen12LP) +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN12LP), 0) + KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN12LP := $(call kokkos_has_string,$(KOKKOS_ARCH),Intel_Gen12LP) +endif +KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelGen9) +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN9), 0) + KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN9 := $(call kokkos_has_string,$(KOKKOS_ARCH),Intel_Gen9) +endif +KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN_SET := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN9) \ + + $(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN11) \ + + $(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN12LP)) +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN_SET), 0) + KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelGen) + ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN), 0) + KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN := $(call kokkos_has_string,$(KOKKOS_ARCH),Intel_Gen) + endif +endif KOKKOS_INTERNAL_USE_ARCH_INTEL_DG1 := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelDG1) +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_DG1), 0) + KOKKOS_INTERNAL_USE_ARCH_INTEL_DG1 := $(call kokkos_has_string,$(KOKKOS_ARCH),Intel_DG1) +endif KOKKOS_INTERNAL_USE_ARCH_INTEL_XEHP := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelXeHP) +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_XEHP), 0) + KOKKOS_INTERNAL_USE_ARCH_INTEL_XEHP := $(call kokkos_has_string,$(KOKKOS_ARCH),Intel_XeHP) +endif +# Traditionally the architecture was called PVC instead of Intel_PVC. This +# version makes us accept IntelPVC and Intel_PVC as well. KOKKOS_INTERNAL_USE_ARCH_INTEL_PVC := $(call kokkos_has_string,$(KOKKOS_ARCH),PVC) # NVIDIA based. @@ -384,14 +415,13 @@ KOKKOS_INTERNAL_USE_ARCH_ARMV81 := $(call kokkos_has_string,$(KOKKOS_ARCH),ARMv8 KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX := $(call kokkos_has_string,$(KOKKOS_ARCH),ARMv8-ThunderX) KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX2 := $(call kokkos_has_string,$(KOKKOS_ARCH),ARMv8-TX2) KOKKOS_INTERNAL_USE_ARCH_A64FX := $(call kokkos_has_string,$(KOKKOS_ARCH),A64FX) -KOKKOS_INTERNAL_USE_ARCH_ARM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_ARMV80)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV81)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX2)+$(KOKKOS_INTERNAL_USE_ARCH_A64FX) | bc)) +KOKKOS_INTERNAL_USE_ARCH_ARMV9_GRACE := $(call kokkos_has_string,$(KOKKOS_ARCH),ARMv9-Grace) +KOKKOS_INTERNAL_USE_ARCH_ARM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_ARMV80)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV81)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX2)+$(KOKKOS_INTERNAL_USE_ARCH_A64FX)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV9_GRACE) | bc)) # IBM based. -KOKKOS_INTERNAL_USE_ARCH_BGQ := $(call kokkos_has_string,$(KOKKOS_ARCH),BGQ) -KOKKOS_INTERNAL_USE_ARCH_POWER7 := $(call kokkos_has_string,$(KOKKOS_ARCH),Power7) KOKKOS_INTERNAL_USE_ARCH_POWER8 := $(call kokkos_has_string,$(KOKKOS_ARCH),Power8) KOKKOS_INTERNAL_USE_ARCH_POWER9 := $(call kokkos_has_string,$(KOKKOS_ARCH),Power9) -KOKKOS_INTERNAL_USE_ARCH_IBM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_BGQ)+$(KOKKOS_INTERNAL_USE_ARCH_POWER7)+$(KOKKOS_INTERNAL_USE_ARCH_POWER8)+$(KOKKOS_INTERNAL_USE_ARCH_POWER9) | bc)) +KOKKOS_INTERNAL_USE_ARCH_IBM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_POWER8)+$(KOKKOS_INTERNAL_USE_ARCH_POWER9) | bc)) # AMD based. KOKKOS_INTERNAL_USE_ARCH_AMDAVX := $(call kokkos_has_string,$(KOKKOS_ARCH),AMDAVX) @@ -402,19 +432,37 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ZEN3), 0) KOKKOS_INTERNAL_USE_ARCH_ZEN := $(call kokkos_has_string,$(KOKKOS_ARCH),Zen) endif endif -KOKKOS_INTERNAL_USE_ARCH_VEGA906 := $(call kokkos_has_string,$(KOKKOS_ARCH),Vega906) -KOKKOS_INTERNAL_USE_ARCH_VEGA908 := $(call kokkos_has_string,$(KOKKOS_ARCH),Vega908) -KOKKOS_INTERNAL_USE_ARCH_VEGA90A := $(call kokkos_has_string,$(KOKKOS_ARCH),Vega90A) -KOKKOS_INTERNAL_USE_ARCH_NAVI1030 := $(call kokkos_has_string,$(KOKKOS_ARCH),Navi1030) + +KOKKOS_INTERNAL_USE_ARCH_AMD_GFX906 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX906) +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX906), 0) + KOKKOS_INTERNAL_USE_ARCH_AMD_GFX906 := $(call kokkos_has_string,$(KOKKOS_ARCH),VEGA906) +endif +KOKKOS_INTERNAL_USE_ARCH_AMD_GFX908 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX908) +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX908), 0) + KOKKOS_INTERNAL_USE_ARCH_AMD_GFX908 := $(call kokkos_has_string,$(KOKKOS_ARCH),VEGA908) +endif +KOKKOS_INTERNAL_USE_ARCH_AMD_GFX90A := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX90A) +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX90A), 0) + KOKKOS_INTERNAL_USE_ARCH_AMD_GFX90A := $(call kokkos_has_string,$(KOKKOS_ARCH),VEGA90A) +endif +KOKKOS_INTERNAL_USE_ARCH_AMD_GFX940 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX940) +KOKKOS_INTERNAL_USE_ARCH_AMD_GFX942 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX942) +KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1030 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX1030) +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1030), 0) + KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1030 := $(call kokkos_has_string,$(KOKKOS_ARCH),NAVI1030) +endif +KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX1100) +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100), 0) + KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100 := $(call kokkos_has_string,$(KOKKOS_ARCH),NAVI1100) +endif # Any AVX? -KOKKOS_INTERNAL_USE_ARCH_SSE42 := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_WSM)) KOKKOS_INTERNAL_USE_ARCH_AVX := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_SNB) + $(KOKKOS_INTERNAL_USE_ARCH_AMDAVX)) KOKKOS_INTERNAL_USE_ARCH_AVX2 := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_HSW) + $(KOKKOS_INTERNAL_USE_ARCH_BDW) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN2) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN3)) KOKKOS_INTERNAL_USE_ARCH_AVX512MIC := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_KNL)) # Incompatible flags? -KOKKOS_INTERNAL_USE_ARCH_MULTIHOST := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_SSE42)+$(KOKKOS_INTERNAL_USE_ARCH_AVX)+$(KOKKOS_INTERNAL_USE_ARCH_AVX2)+$(KOKKOS_INTERNAL_USE_ARCH_AVX512MIC)+$(KOKKOS_INTERNAL_USE_ARCH_SKL)+$(KOKKOS_INTERNAL_USE_ARCH_SKX)+$(KOKKOS_INTERNAL_USE_ARCH_ICL)+$(KOKKOS_INTERNAL_USE_ARCH_ICX)+$(KOKKOS_INTERNAL_USE_ARCH_SPR)+$(KOKKOS_INTERNAL_USE_ARCH_KNC)+$(KOKKOS_INTERNAL_USE_ARCH_IBM)+$(KOKKOS_INTERNAL_USE_ARCH_ARM)>1") | bc) +KOKKOS_INTERNAL_USE_ARCH_MULTIHOST := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_AVX)+$(KOKKOS_INTERNAL_USE_ARCH_AVX2)+$(KOKKOS_INTERNAL_USE_ARCH_AVX512MIC)+$(KOKKOS_INTERNAL_USE_ARCH_SKL)+$(KOKKOS_INTERNAL_USE_ARCH_SKX)+$(KOKKOS_INTERNAL_USE_ARCH_ICL)+$(KOKKOS_INTERNAL_USE_ARCH_ICX)+$(KOKKOS_INTERNAL_USE_ARCH_SPR)+$(KOKKOS_INTERNAL_USE_ARCH_KNC)+$(KOKKOS_INTERNAL_USE_ARCH_IBM)+$(KOKKOS_INTERNAL_USE_ARCH_ARM)>1") | bc) KOKKOS_INTERNAL_USE_ARCH_MULTIGPU := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_NVIDIA)>1") | bc) ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MULTIHOST), 1) @@ -559,6 +607,16 @@ ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX2B), 1) KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX2B_FLAG) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CXX23") endif +ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX26), 1) + #I cannot make CMake add this in a good way - so add it here + KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX26_FLAG) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CXX26") +endif +ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX2C), 1) + #I cannot make CMake add this in a good way - so add it here + KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX2C_FLAG) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CXX26") +endif ifeq ($(KOKKOS_INTERNAL_ENABLE_DEBUG), 1) ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1) @@ -598,27 +656,6 @@ ifeq ($(KOKKOS_INTERNAL_USE_HWLOC), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_HWLOC") endif -ifeq ($(KOKKOS_INTERNAL_USE_LIBRT), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_USE_LIBRT") - KOKKOS_LIBS += -lrt - KOKKOS_TPL_LIBRARY_NAMES += rt -endif - -ifeq ($(KOKKOS_INTERNAL_USE_MEMKIND), 1) - ifneq ($(KOKKOS_CMAKE), yes) - ifneq ($(MEMKIND_PATH),) - KOKKOS_CPPFLAGS += -I$(MEMKIND_PATH)/include - KOKKOS_LIBDIRS += -L$(MEMKIND_PATH)/lib - KOKKOS_CXXLDFLAGS += -L$(MEMKIND_PATH)/lib - KOKKOS_TPL_INCLUDE_DIRS += $(MEMKIND_PATH)/include - KOKKOS_TPL_LIBRARY_DIRS += $(MEMKIND_PATH)/lib - endif - KOKKOS_LIBS += -lmemkind -lnuma - KOKKOS_TPL_LIBRARY_NAMES += memkind numa - endif - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_HBWSPACE") -endif - ifeq ($(KOKKOS_INTERNAL_ENABLE_LARGE_MEM_TESTS), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_LARGE_MEM_TESTS") endif @@ -685,8 +722,10 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) endif endif - ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_IMPL_CUDA_CLANG_WORKAROUND") + ifeq ($(KOKKOS_INTERNAL_CUDA_DISABLE_MALLOC_ASYNC), 0) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_IMPL_CUDA_MALLOC_ASYNC") + else + tmp := $(call kokkos_append_header,"/* $H""undef KOKKOS_ENABLE_IMPL_CUDA_MALLOC_ASYNC */") endif endif @@ -700,6 +739,7 @@ endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV80), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV80") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARM_NEON") ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) KOKKOS_CXXFLAGS += @@ -712,6 +752,7 @@ endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV81), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV81") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARM_NEON") ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) KOKKOS_CXXFLAGS += @@ -724,6 +765,7 @@ endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_A64FX), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_A64FX") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARM_NEON") KOKKOS_CXXFLAGS += -march=armv8.2-a+sve KOKKOS_LDFLAGS += -march=armv8.2-a+sve @@ -737,9 +779,17 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_A64FX), 1) endif endif +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV9_GRACE), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV9_GRACE") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARM_NEON") + + KOKKOS_CXXFLAGS += -mcpu=neoverse-v2 -msve-vector-bits=128 + KOKKOS_LDFLAGS += -mcpu=neoverse-v2 -msve-vector-bits=128 +endif + ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ZEN), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_ZEN") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_AVX2") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX2") ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1) KOKKOS_CXXFLAGS += -mavx2 @@ -752,7 +802,7 @@ endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ZEN2), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_ZEN2") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_AVX2") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX2") ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1) KOKKOS_CXXFLAGS += -mavx2 @@ -765,7 +815,7 @@ endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ZEN3), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_ZEN3") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_AVX2") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX2") ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1) KOKKOS_CXXFLAGS += -mavx2 @@ -779,6 +829,7 @@ endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV80") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV8_THUNDERX") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARM_NEON") ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) KOKKOS_CXXFLAGS += @@ -792,6 +843,7 @@ endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX2), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV81") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV8_THUNDERX2") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARM_NEON") ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) KOKKOS_CXXFLAGS += @@ -802,20 +854,6 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX2), 1) endif endif -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_SSE42), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_SSE42") - - ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1) - KOKKOS_CXXFLAGS += -xSSE4.2 - KOKKOS_LDFLAGS += -xSSE4.2 - else ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) - else - # Assume that this is a really a GNU compiler. - KOKKOS_CXXFLAGS += -msse4.2 - KOKKOS_LDFLAGS += -msse4.2 - endif -endif - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX") @@ -1075,29 +1113,39 @@ endif # Figure out the architecture flag for ROCm. -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VEGA906), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA906") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA") +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX906), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GFX906") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU") KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx906 endif -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VEGA908), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA908") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA") +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX908), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GFX908") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU") KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx908 endif -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VEGA90A), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA90A") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA") +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX90A), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GFX90A") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU") KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx90a endif -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_NAVI1030), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_NAVI1030") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_NAVI") +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX940), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GFX940") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU") + KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx940 +endif +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX942), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GFX942") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU") + KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx942 +endif +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1030), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GFX1030") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU") KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx1030 endif -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_NAVI1100), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_NAVI1100") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_NAVI") +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GFX1100") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU") KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx1100 endif @@ -1192,6 +1240,8 @@ ifeq ($(KOKKOS_INTERNAL_DISABLE_BUNDLED_MDSPAN), 0) endif tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_IMPL_MDSPAN") +tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_IMPL_REF_COUNT_BRANCH_UNLIKELY") + KOKKOS_INTERNAL_LS_CONFIG := $(shell ls KokkosCore_config.h 2>&1) ifeq ($(KOKKOS_INTERNAL_LS_CONFIG), KokkosCore_config.h) @@ -1214,7 +1264,22 @@ ifneq ($(KOKKOS_INTERNAL_NEW_CONFIG), 0) tmp := $(call kokkos_update_config_header, KOKKOS_FWD_HPP_, "KokkosCore_Config_FwdBackend.tmp", "KokkosCore_Config_FwdBackend.hpp") tmp := $(call kokkos_update_config_header, KOKKOS_SETUP_HPP_, "KokkosCore_Config_SetupBackend.tmp", "KokkosCore_Config_SetupBackend.hpp") tmp := $(call kokkos_update_config_header, KOKKOS_DECLARE_HPP_, "KokkosCore_Config_DeclareBackend.tmp", "KokkosCore_Config_DeclareBackend.hpp") - tmp := $(call kokkos_update_config_header, KOKKOS_POST_INCLUDE_HPP_, "KokkosCore_Config_PostInclude.tmp", "KokkosCore_Config_PostInclude.hpp") + ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) + tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_FwdBackend.hpp") + tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_DeclareBackend.hpp") + endif + ifeq ($(KOKKOS_INTERNAL_USE_THREADS), 1) + tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_FwdBackend.hpp") + tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_DeclareBackend.hpp") + endif + ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1) + tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_FwdBackend.hpp") + tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_DeclareBackend.hpp") + endif + ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1) + tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_FwdBackend.hpp") + tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_DeclareBackend.hpp") + endif ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_FwdBackend.hpp") tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_DeclareBackend.hpp") @@ -1234,30 +1299,10 @@ ifneq ($(KOKKOS_INTERNAL_NEW_CONFIG), 0) tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_DeclareBackend.hpp") tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_SetupBackend.hpp") endif - ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) - tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_FwdBackend.hpp") - tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_DeclareBackend.hpp") - endif ifeq ($(KOKKOS_INTERNAL_USE_OPENACC), 1) tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_FwdBackend.hpp") tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_DeclareBackend.hpp") endif - ifeq ($(KOKKOS_INTERNAL_USE_THREADS), 1) - tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_FwdBackend.hpp") - tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_DeclareBackend.hpp") - endif - ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1) - tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_FwdBackend.hpp") - tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_DeclareBackend.hpp") - endif - ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1) - tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_FwdBackend.hpp") - tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_DeclareBackend.hpp") - endif - ifeq ($(KOKKOS_INTERNAL_USE_MEMKIND), 1) - tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_FwdBackend.hpp") - tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_DeclareBackend.hpp") - endif endif KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/*.hpp) @@ -1368,11 +1413,6 @@ ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1) KOKKOS_TPL_LIBRARY_NAMES += hpx endif -# Don't include Kokkos_HBWSpace.cpp if not using MEMKIND to avoid a link warning. -ifneq ($(KOKKOS_INTERNAL_USE_MEMKIND), 1) - KOKKOS_SRC := $(filter-out $(KOKKOS_PATH)/core/src/impl/Kokkos_HBWSpace.cpp,$(KOKKOS_SRC)) -endif - # With Cygwin functions such as fdopen and fileno are not defined # when strict ansi is enabled. strict ansi gets enabled with -std=c++14 # though. So we hard undefine it here. Not sure if that has any bad side effects @@ -1426,6 +1466,12 @@ ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1) else tmp := $(call desul_append_header,"/* $H""undef DESUL_ATOMICS_ENABLE_OPENMP */") endif + +ifeq ($(KOKKOS_INTERNAL_USE_OPENACC), 1) + tmp := $(call desul_append_header,"$H""define DESUL_ATOMICS_ENABLE_OPENACC") +else + tmp := $(call desul_append_header,"/* $H""undef DESUL_ATOMICS_ENABLE_OPENACC */") +endif tmp := $(call desul_append_header, "") tmp := $(call desul_append_header, "$H""endif") @@ -1458,7 +1504,7 @@ include $(KOKKOS_PATH)/Makefile.targets kokkos-clean: rm -f $(KOKKOS_OBJ_LINK) $(DESUL_CONFIG_HEADER) $(DESUL_INTERNAL_CONFIG_TMP) KokkosCore_config.h KokkosCore_config.tmp libkokkos.a KokkosCore_Config_SetupBackend.hpp \ KokkosCore_Config_FwdBackend.hpp KokkosCore_Config_DeclareBackend.hpp KokkosCore_Config_DeclareBackend.tmp \ - KokkosCore_Config_FwdBackend.tmp KokkosCore_Config_PostInclude.hpp KokkosCore_Config_PostInclude.tmp KokkosCore_Config_SetupBackend.tmp + KokkosCore_Config_FwdBackend.tmp KokkosCore_Config_SetupBackend.tmp libkokkos.a: $(KOKKOS_OBJ_LINK) $(KOKKOS_SRC) $(KOKKOS_HEADERS) ar cr libkokkos.a $(KOKKOS_OBJ_LINK) diff --git a/Makefile.targets b/Makefile.targets index 4e08a46c69..e8e429e027 100644 --- a/Makefile.targets +++ b/Makefile.targets @@ -20,8 +20,6 @@ Kokkos_TaskQueue.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Ta $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_TaskQueue.cpp Kokkos_HostThreadTeam.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HostThreadTeam.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HostThreadTeam.cpp -Kokkos_Spinwait.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Spinwait.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Spinwait.cpp Kokkos_HostBarrier.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HostBarrier.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HostBarrier.cpp Kokkos_Profiling.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Profiling.cpp @@ -30,12 +28,12 @@ Kokkos_SharedAlloc.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_ $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_SharedAlloc.cpp Kokkos_MemoryPool.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_MemoryPool.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_MemoryPool.cpp -Kokkos_MemorySpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_MemorySpace.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_MemorySpace.cpp Kokkos_HostSpace_deepcopy.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HostSpace_deepcopy.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HostSpace_deepcopy.cpp Kokkos_NumericTraits.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_NumericTraits.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_NumericTraits.cpp +Kokkos_Abort.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Abort.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Abort.cpp ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1) Kokkos_Serial.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Serial/Kokkos_Serial.cpp @@ -80,8 +78,10 @@ Lock_Array_HIP.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/tpls/desul/src/Lock_Array endif ifeq ($(KOKKOS_INTERNAL_USE_THREADS), 1) -Kokkos_ThreadsExec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec.cpp +Kokkos_Threads_Instance.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokkos_Threads_Instance.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_Threads_Instance.cpp +Kokkos_Threads_Spinwait.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokkos_Threads_Spinwait.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_Threads_Spinwait.cpp endif ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) @@ -121,6 +121,3 @@ Kokkos_OpenACC_Instance.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenACC Kokkos_OpenACC_SharedAllocationRecord.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenACC/Kokkos_OpenACC_SharedAllocationRecord.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenACC/Kokkos_OpenACC_SharedAllocationRecord.cpp endif - -Kokkos_HBWSpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HBWSpace.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HBWSpace.cpp diff --git a/README.md b/README.md index 033346e956..c8c6f8f7cf 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -![Kokkos](https://avatars2.githubusercontent.com/u/10199860?s=200&v=4) +[![Kokkos](https://avatars2.githubusercontent.com/u/10199860?s=200&v=4)](https://kokkos.org) # Kokkos: Core Libraries @@ -10,48 +10,71 @@ hierarchies and multiple types of execution resources. It currently can use CUDA, HIP, SYCL, HPX, OpenMP and C++ threads as backend programming models with several other backends in development. -**Kokkos Core is part of the Kokkos C++ Performance Portability Programming EcoSystem.** +**Kokkos Core is part of the [Kokkos C++ Performance Portability Programming Ecosystem](https://kokkos.org/about/abstract/).** -For the complete documentation, click below: +Kokkos is a [Linux Foundation](https://linuxfoundation.org) project. -# [kokkos.github.io/kokkos-core-wiki](https://kokkos.github.io/kokkos-core-wiki) - -# Learning about Kokkos +## Learning about Kokkos To start learning about Kokkos: -- [Kokkos Lectures](https://kokkos.github.io/kokkos-core-wiki/videolectures.html): they contain a mix of lecture videos and hands-on exercises covering all the important Kokkos Ecosystem capabilities. +- [Kokkos Lectures](https://kokkos.org/kokkos-core-wiki/videolectures.html): they contain a mix of lecture videos and hands-on exercises covering all the important capabilities. -- [Programming guide](https://kokkos.github.io/kokkos-core-wiki/programmingguide.html): contains in "narrative" form a technical description of the programming model, machine model, and the main building blocks like the Views and parallel dispatch. +- [Programming guide](https://kokkos.org/kokkos-core-wiki/programmingguide.html): contains in "narrative" form a technical description of the programming model, machine model, and the main building blocks like the Views and parallel dispatch. -- [API reference](https://kokkos.github.io/kokkos-core-wiki/): organized by category, i.e., [core](https://kokkos.github.io/kokkos-core-wiki/API/core-index.html), [algorithms](https://kokkos.github.io/kokkos-core-wiki/API/algorithms-index.html) and [containers](https://kokkos.github.io/kokkos-core-wiki/API/containers-index.html) or, if you prefer, in [alphabetical order](https://kokkos.github.io/kokkos-core-wiki/API/alphabetical.html). +- [API reference](https://kokkos.org/kokkos-core-wiki/): organized by category, i.e., [core](https://kokkos.org/kokkos-core-wiki/API/core-index.html), [algorithms](https://kokkos.org/kokkos-core-wiki/API/algorithms-index.html) and [containers](https://kokkos.org/kokkos-core-wiki/API/containers-index.html) or, if you prefer, in [alphabetical order](https://kokkos.org/kokkos-core-wiki/API/alphabetical.html). -- [Use cases and Examples](https://kokkos.github.io/kokkos-core-wiki/usecases.html): a series of examples ranging from how to use Kokkos with MPI to Fortran interoperability. +- [Use cases and Examples](https://kokkos.org/kokkos-core-wiki/usecases.html): a serie of examples ranging from how to use Kokkos with MPI to Fortran interoperability. -For questions find us on Slack: https://kokkosteam.slack.com or open a github issue. +## Obtaining Kokkos -For non-public questions send an email to: *crtrott(at)sandia.gov* +The latest release of Kokkos can be obtained from the [GitHub releases page](https://github.com/kokkos/kokkos/releases/latest). + +The current release is [4.3.01](https://github.com/kokkos/kokkos/releases/tag/4.3.01). + +```bash +curl -OJ -L https://github.com/kokkos/kokkos/archive/refs/tags/4.3.01.tar.gz +# Or with wget +wget https://github.com/kokkos/kokkos/archive/refs/tags/4.3.01.tar.gz +``` + +To clone the latest development version of Kokkos from GitHub: + +```bash +git clone -b develop https://github.com/kokkos/kokkos.git +``` -# Contributing to Kokkos +### Building Kokkos -Please see [this page](https://kokkos.github.io/kokkos-core-wiki/contributing.html) for details on how to contribute. +To build Kokkos, you will need to have a C++ compiler that supports C++17 or later. +All requirements including minimum and primary tested compiler versions can be found [here](https://kokkos.org/kokkos-core-wiki/requirements.html). -# Requirements, Building and Installing +Building and installation instructions are described [here](https://kokkos.org/kokkos-core-wiki/building.html). + +You can also install Kokkos using [Spack](https://spack.io/): `spack install kokkos`. [Available configuration options](https://packages.spack.io/package.html?name=kokkos) can be displayed using `spack info kokkos`. + +## For the complete documentation: [kokkos.org/kokkos-core-wiki/](https://kokkos.org/kokkos-core-wiki/) + +## Support + +For questions find us on Slack: https://kokkosteam.slack.com or open a GitHub issue. + +For non-public questions send an email to: *crtrott(at)sandia.gov* -All requirements including minimum and primary tested compiler versions can be found [here](https://kokkos.github.io/kokkos-core-wiki/requirements.html). +## Contributing -Building and installation instructions are described [here](https://kokkos.github.io/kokkos-core-wiki/building.html). +Please see [this page](https://kokkos.org/kokkos-core-wiki/contributing.html) for details on how to contribute. -# Citing Kokkos +## Citing Kokkos -Please see the [following page](https://kokkos.github.io/kokkos-core-wiki/citation.html). +Please see the [following page](https://kokkos.org/kokkos-core-wiki/citation.html). -# License +## License -[![License](https://img.shields.io/badge/License-BSD%203--Clause-blue.svg)](https://opensource.org/licenses/BSD-3-Clause) +[![License](https://img.shields.io/badge/License-Apache--2.0_WITH_LLVM--exception-blue)](https://spdx.org/licenses/LLVM-exception.html) Under the terms of Contract DE-NA0003525 with NTESS, the U.S. Government retains certain rights in this software. -The full license statement used in all headers is available [here](https://kokkos.github.io/kokkos-core-wiki/license.html) or -[here](https://github.com/kokkos/kokkos/blob/master/LICENSE). +The full license statement used in all headers is available [here](https://kokkos.org/kokkos-core-wiki/license.html) or +[here](https://github.com/kokkos/kokkos/blob/develop/LICENSE). diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 0000000000..93cf6e3663 --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,12 @@ +# Reporting Security Issues + +To report a security issue, please email +[lebrungrandt@ornl.gov](mailto:lebrungrandt@ornl.gov) +and [crtrott@sandia.gov](mailto:crtrott@sandia.gov) +with a description of the issue, the steps you took to create the issue, +affected versions, and, if known, mitigations for the issue. + +Our vulnerability management team will respond within 5 working days of your +email. If the issue is confirmed as a vulnerability, we will open a +Security Advisory and acknowledge your contributions as part of it. This project +follows a 90 day disclosure timeline. diff --git a/Spack.md b/Spack.md index 79606c259d..06c763a64e 100644 --- a/Spack.md +++ b/Spack.md @@ -159,7 +159,6 @@ If you don't specify a CUDA build variant in a `packages.yaml` and you build you > spack install superscience ```` you may end up just getting the default Kokkos (i.e. Serial). -Some examples are included in the `config/yaml` folder for common platforms. Before running `spack install ` we recommend running `spack spec ` to confirm your dependency tree is correct. For example, with Kokkos Kernels: ````bash diff --git a/algorithms/CMakeLists.txt b/algorithms/CMakeLists.txt index ab557ab66a..368984647e 100644 --- a/algorithms/CMakeLists.txt +++ b/algorithms/CMakeLists.txt @@ -2,6 +2,6 @@ IF (NOT Kokkos_INSTALL_TESTING) ADD_SUBDIRECTORY(src) ENDIF() # FIXME_OPENACC: temporarily disabled due to unimplemented features -IF(NOT ((KOKKOS_ENABLE_OPENMPTARGET OR KOKKOS_ENABLE_OPENACC) AND KOKKOS_CXX_COMPILER_ID STREQUAL NVHPC)) +IF(NOT ((KOKKOS_ENABLE_OPENMPTARGET AND KOKKOS_CXX_COMPILER_ID STREQUAL NVHPC) OR KOKKOS_ENABLE_OPENACC)) KOKKOS_ADD_TEST_DIRECTORIES(unit_tests) ENDIF() diff --git a/algorithms/src/CMakeLists.txt b/algorithms/src/CMakeLists.txt index 1695778947..b490caca62 100644 --- a/algorithms/src/CMakeLists.txt +++ b/algorithms/src/CMakeLists.txt @@ -30,5 +30,5 @@ KOKKOS_LIB_INCLUDE_DIRECTORIES(kokkosalgorithms ${CMAKE_CURRENT_SOURCE_DIR} ) - - +KOKKOS_LINK_TPL(kokkoscontainers PUBLIC ROCTHRUST) +KOKKOS_LINK_TPL(kokkoscore PUBLIC ONEDPL) diff --git a/algorithms/src/Kokkos_NestedSort.hpp b/algorithms/src/Kokkos_NestedSort.hpp index 4c8be792d8..18e0674efe 100644 --- a/algorithms/src/Kokkos_NestedSort.hpp +++ b/algorithms/src/Kokkos_NestedSort.hpp @@ -14,175 +14,17 @@ // //@HEADER -#ifndef KOKKOS_NESTEDSORT_HPP_ -#define KOKKOS_NESTEDSORT_HPP_ - -#include -#include -#include - -namespace Kokkos { -namespace Experimental { -namespace Impl { - -// true for TeamVectorRange, false for ThreadVectorRange -template -struct NestedRange {}; - -// Specialization for team-level -template <> -struct NestedRange { - template - KOKKOS_FUNCTION static auto create(const TeamMember& t, SizeType len) { - return Kokkos::TeamVectorRange(t, len); - } - template - KOKKOS_FUNCTION static void barrier(const TeamMember& t) { - t.team_barrier(); - } -}; - -// Specialization for thread-level -template <> -struct NestedRange { - template - KOKKOS_FUNCTION static auto create(const TeamMember& t, SizeType len) { - return Kokkos::ThreadVectorRange(t, len); - } - // Barrier is no-op, as vector lanes of a thread are implicitly synchronized - // after parallel region - template - KOKKOS_FUNCTION static void barrier(const TeamMember&) {} -}; - -// When just doing sort (not sort_by_key), use nullptr_t for ValueViewType. -// This only takes the NestedRange instance for template arg deduction. -template -KOKKOS_INLINE_FUNCTION void sort_nested_impl( - const TeamMember& t, const KeyViewType& keyView, - [[maybe_unused]] const ValueViewType& valueView, const Comparator& comp, - const NestedRange) { - using SizeType = typename KeyViewType::size_type; - using KeyType = typename KeyViewType::non_const_value_type; - using Range = NestedRange; - SizeType n = keyView.extent(0); - SizeType npot = 1; - SizeType levels = 0; - // FIXME: ceiling power-of-two is a common thing to need - make it a utility - while (npot < n) { - levels++; - npot <<= 1; - } - for (SizeType i = 0; i < levels; i++) { - for (SizeType j = 0; j <= i; j++) { - // n/2 pairs of items are compared in parallel - Kokkos::parallel_for(Range::create(t, npot / 2), [=](const SizeType k) { - // How big are the brown/pink boxes? - // (Terminology comes from Wikipedia diagram) - // https://commons.wikimedia.org/wiki/File:BitonicSort.svg#/media/File:BitonicSort.svg - SizeType boxSize = SizeType(2) << (i - j); - // Which box contains this thread? - SizeType boxID = k >> (i - j); // k * 2 / boxSize; - SizeType boxStart = boxID << (1 + i - j); // boxID * boxSize - SizeType boxOffset = k - (boxStart >> 1); // k - boxID * boxSize / 2; - SizeType elem1 = boxStart + boxOffset; - // In first phase (j == 0, brown box): within a box, compare with the - // opposite value in the box. - // In later phases (j > 0, pink box): within a box, compare with fixed - // distance (boxSize / 2) apart. - SizeType elem2 = (j == 0) ? (boxStart + boxSize - 1 - boxOffset) - : (elem1 + boxSize / 2); - if (elem2 < n) { - KeyType key1 = keyView(elem1); - KeyType key2 = keyView(elem2); - if (comp(key2, key1)) { - keyView(elem1) = key2; - keyView(elem2) = key1; - if constexpr (!std::is_same_v) { - Kokkos::Experimental::swap(valueView(elem1), valueView(elem2)); - } - } - } - }); - Range::barrier(t); - } - } -} - -} // namespace Impl - -template -KOKKOS_INLINE_FUNCTION void sort_team(const TeamMember& t, - const ViewType& view) { - Impl::sort_nested_impl(t, view, nullptr, - Experimental::Impl::StdAlgoLessThanBinaryPredicate< - typename ViewType::non_const_value_type>(), - Impl::NestedRange()); -} - -template -KOKKOS_INLINE_FUNCTION void sort_team(const TeamMember& t, const ViewType& view, - const Comparator& comp) { - Impl::sort_nested_impl(t, view, nullptr, comp, Impl::NestedRange()); -} - -template -KOKKOS_INLINE_FUNCTION void sort_by_key_team(const TeamMember& t, - const KeyViewType& keyView, - const ValueViewType& valueView) { - Impl::sort_nested_impl(t, keyView, valueView, - Experimental::Impl::StdAlgoLessThanBinaryPredicate< - typename KeyViewType::non_const_value_type>(), - Impl::NestedRange()); -} - -template -KOKKOS_INLINE_FUNCTION void sort_by_key_team(const TeamMember& t, - const KeyViewType& keyView, - const ValueViewType& valueView, - const Comparator& comp) { - Impl::sort_nested_impl(t, keyView, valueView, comp, - Impl::NestedRange()); -} - -template -KOKKOS_INLINE_FUNCTION void sort_thread(const TeamMember& t, - const ViewType& view) { - Impl::sort_nested_impl(t, view, nullptr, - Experimental::Impl::StdAlgoLessThanBinaryPredicate< - typename ViewType::non_const_value_type>(), - Impl::NestedRange()); -} - -template -KOKKOS_INLINE_FUNCTION void sort_thread(const TeamMember& t, - const ViewType& view, - const Comparator& comp) { - Impl::sort_nested_impl(t, view, nullptr, comp, Impl::NestedRange()); -} - -template -KOKKOS_INLINE_FUNCTION void sort_by_key_thread(const TeamMember& t, - const KeyViewType& keyView, - const ValueViewType& valueView) { - Impl::sort_nested_impl(t, keyView, valueView, - Experimental::Impl::StdAlgoLessThanBinaryPredicate< - typename KeyViewType::non_const_value_type>(), - Impl::NestedRange()); -} +#ifndef KOKKOS_NESTED_SORT_HPP_ +#define KOKKOS_NESTED_SORT_HPP_ +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_NESTED_SORT +#endif -template -KOKKOS_INLINE_FUNCTION void sort_by_key_thread(const TeamMember& t, - const KeyViewType& keyView, - const ValueViewType& valueView, - const Comparator& comp) { - Impl::sort_nested_impl(t, keyView, valueView, comp, - Impl::NestedRange()); -} +#include "sorting/Kokkos_NestedSortPublicAPI.hpp" -} // namespace Experimental -} // namespace Kokkos +#ifdef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_NESTED_SORT +#undef KOKKOS_IMPL_PUBLIC_INCLUDE +#undef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_NESTED_SORT +#endif #endif diff --git a/algorithms/src/Kokkos_Random.hpp b/algorithms/src/Kokkos_Random.hpp index abb028d28e..7df12b8518 100644 --- a/algorithms/src/Kokkos_Random.hpp +++ b/algorithms/src/Kokkos_Random.hpp @@ -849,18 +849,17 @@ class Random_XorShift64 { return drand(end - start) + start; } - // Marsaglia polar method for drawing a standard normal distributed random + // Box-muller method for drawing a standard normal distributed random // number KOKKOS_INLINE_FUNCTION double normal() { - double S = 2.0; - double U; - while (S >= 1.0) { - U = 2.0 * drand() - 1.0; - const double V = 2.0 * drand() - 1.0; - S = U * U + V * V; - } - return U * std::sqrt(-2.0 * std::log(S) / S); + constexpr auto two_pi = 2 * Kokkos::numbers::pi_v; + + const double u = drand(); + const double v = drand(); + const double r = Kokkos::sqrt(-2.0 * Kokkos::log(u)); + const double theta = v * two_pi; + return r * Kokkos::cos(theta); } KOKKOS_INLINE_FUNCTION @@ -956,6 +955,8 @@ class Random_XorShift64_Pool { KOKKOS_INLINE_FUNCTION void free_state(const Random_XorShift64& state) const { state_(state.state_idx_, 0) = state.state_; + // Release the lock only after the state has been updated in memory + Kokkos::memory_fence(); locks_(state.state_idx_, 0) = 0; } }; @@ -1092,18 +1093,17 @@ class Random_XorShift1024 { return drand(end - start) + start; } - // Marsaglia polar method for drawing a standard normal distributed random + // Box-muller method for drawing a standard normal distributed random // number KOKKOS_INLINE_FUNCTION double normal() { - double S = 2.0; - double U; - while (S >= 1.0) { - U = 2.0 * drand() - 1.0; - const double V = 2.0 * drand() - 1.0; - S = U * U + V * V; - } - return U * std::sqrt(-2.0 * std::log(S) / S); + constexpr auto two_pi = 2 * Kokkos::numbers::pi_v; + + const double u = drand(); + const double v = drand(); + const double r = Kokkos::sqrt(-2.0 * Kokkos::log(u)); + const double theta = v * two_pi; + return r * Kokkos::cos(theta); } KOKKOS_INLINE_FUNCTION @@ -1208,7 +1208,9 @@ class Random_XorShift1024_Pool { KOKKOS_INLINE_FUNCTION void free_state(const Random_XorShift1024& state) const { for (int i = 0; i < 16; i++) state_(state.state_idx_, i) = state.state_[i]; - p_(state.state_idx_, 0) = state.p_; + p_(state.state_idx_, 0) = state.p_; + // Release the lock only after the state has been updated in memory + Kokkos::memory_fence(); locks_(state.state_idx_, 0) = 0; } }; @@ -1541,13 +1543,23 @@ template void fill_random(ViewType a, RandomPool g, typename ViewType::const_value_type begin, typename ViewType::const_value_type end) { - fill_random(typename ViewType::execution_space{}, a, g, begin, end); + Kokkos::fence( + "fill_random: fence before since no execution space instance provided"); + typename ViewType::execution_space exec; + fill_random(exec, a, g, begin, end); + exec.fence( + "fill_random: fence after since no execution space instance provided"); } template void fill_random(ViewType a, RandomPool g, typename ViewType::const_value_type range) { - fill_random(typename ViewType::execution_space{}, a, g, 0, range); + Kokkos::fence( + "fill_random: fence before since no execution space instance provided"); + typename ViewType::execution_space exec; + fill_random(exec, a, g, 0, range); + exec.fence( + "fill_random: fence after since no execution space instance provided"); } } // namespace Kokkos diff --git a/algorithms/src/Kokkos_Sort.hpp b/algorithms/src/Kokkos_Sort.hpp index 10f9ad6462..136b4ec82d 100644 --- a/algorithms/src/Kokkos_Sort.hpp +++ b/algorithms/src/Kokkos_Sort.hpp @@ -21,762 +21,10 @@ #define KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_SORT #endif -#include -#include -#include -#include - -#if defined(KOKKOS_ENABLE_CUDA) - -// Workaround for `Instruction 'shfl' without '.sync' is not supported on -// .target sm_70 and higher from PTX ISA version 6.4`. -// Also see https://github.com/NVIDIA/cub/pull/170. -#if !defined(CUB_USE_COOPERATIVE_GROUPS) -#define CUB_USE_COOPERATIVE_GROUPS -#endif - -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wshadow" - -#if defined(KOKKOS_COMPILER_CLANG) -// Some versions of Clang fail to compile Thrust, failing with errors like -// this: -// /thrust/system/cuda/detail/core/agent_launcher.h:557:11: -// error: use of undeclared identifier 'va_printf' -// The exact combination of versions for Clang and Thrust (or CUDA) for this -// failure was not investigated, however even very recent version combination -// (Clang 10.0.0 and Cuda 10.0) demonstrated failure. -// -// Defining _CubLog here locally allows us to avoid that code path, however -// disabling some debugging diagnostics -#pragma push_macro("_CubLog") -#ifdef _CubLog -#undef _CubLog -#endif -#define _CubLog -#include -#include -#pragma pop_macro("_CubLog") -#else -#include -#include -#endif - -#pragma GCC diagnostic pop - -#endif - -#if defined(KOKKOS_ENABLE_ONEDPL) -#include -#include -#endif - -namespace Kokkos { - -namespace Impl { - -template -struct CopyOp; - -template -struct CopyOp { - KOKKOS_INLINE_FUNCTION - static void copy(DstViewType const& dst, size_t i_dst, SrcViewType const& src, - size_t i_src) { - dst(i_dst) = src(i_src); - } -}; - -template -struct CopyOp { - KOKKOS_INLINE_FUNCTION - static void copy(DstViewType const& dst, size_t i_dst, SrcViewType const& src, - size_t i_src) { - for (int j = 0; j < (int)dst.extent(1); j++) dst(i_dst, j) = src(i_src, j); - } -}; - -template -struct CopyOp { - KOKKOS_INLINE_FUNCTION - static void copy(DstViewType const& dst, size_t i_dst, SrcViewType const& src, - size_t i_src) { - for (int j = 0; j < dst.extent(1); j++) - for (int k = 0; k < dst.extent(2); k++) - dst(i_dst, j, k) = src(i_src, j, k); - } -}; -} // namespace Impl - -//---------------------------------------------------------------------------- - -template -class BinSort { - public: - template - struct copy_functor { - using src_view_type = typename SrcViewType::const_type; - - using copy_op = Impl::CopyOp; - - DstViewType dst_values; - src_view_type src_values; - int dst_offset; - - copy_functor(DstViewType const& dst_values_, int const& dst_offset_, - SrcViewType const& src_values_) - : dst_values(dst_values_), - src_values(src_values_), - dst_offset(dst_offset_) {} - - KOKKOS_INLINE_FUNCTION - void operator()(const int& i) const { - copy_op::copy(dst_values, i + dst_offset, src_values, i); - } - }; - - template - struct copy_permute_functor { - // If a Kokkos::View then can generate constant random access - // otherwise can only use the constant type. - - using src_view_type = std::conditional_t< - Kokkos::is_view::value, - Kokkos::View -#endif - >, - typename SrcViewType::const_type>; - - using perm_view_type = typename PermuteViewType::const_type; - - using copy_op = Impl::CopyOp; - - DstViewType dst_values; - perm_view_type sort_order; - src_view_type src_values; - int src_offset; - - copy_permute_functor(DstViewType const& dst_values_, - PermuteViewType const& sort_order_, - SrcViewType const& src_values_, int const& src_offset_) - : dst_values(dst_values_), - sort_order(sort_order_), - src_values(src_values_), - src_offset(src_offset_) {} - - KOKKOS_INLINE_FUNCTION - void operator()(const int& i) const { - copy_op::copy(dst_values, i, src_values, src_offset + sort_order(i)); - } - }; - - // Naming this alias "execution_space" would be problematic since it would be - // considered as execution space for the various functors which might use - // another execution space through sort() or create_permute_vector(). - using exec_space = typename Space::execution_space; - using bin_op_type = BinSortOp; - - struct bin_count_tag {}; - struct bin_offset_tag {}; - struct bin_binning_tag {}; - struct bin_sort_bins_tag {}; - - public: - using size_type = SizeType; - using value_type = size_type; - - using offset_type = Kokkos::View; - using bin_count_type = Kokkos::View; - - using const_key_view_type = typename KeyViewType::const_type; - - // If a Kokkos::View then can generate constant random access - // otherwise can only use the constant type. - - using const_rnd_key_view_type = std::conditional_t< - Kokkos::is_view::value, - Kokkos::View >, - const_key_view_type>; - - using non_const_key_scalar = typename KeyViewType::non_const_value_type; - using const_key_scalar = typename KeyViewType::const_value_type; - - using bin_count_atomic_type = - Kokkos::View >; - - private: - const_key_view_type keys; - const_rnd_key_view_type keys_rnd; - - public: - BinSortOp bin_op; - offset_type bin_offsets; - bin_count_atomic_type bin_count_atomic; - bin_count_type bin_count_const; - offset_type sort_order; - - int range_begin; - int range_end; - bool sort_within_bins; - - public: -#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 - KOKKOS_DEPRECATED BinSort() = default; -#else - BinSort() = delete; -#endif - - //---------------------------------------- - // Constructor: takes the keys, the binning_operator and optionally whether to - // sort within bins (default false) - template - BinSort(const ExecutionSpace& exec, const_key_view_type keys_, - int range_begin_, int range_end_, BinSortOp bin_op_, - bool sort_within_bins_ = false) - : keys(keys_), - keys_rnd(keys_), - bin_op(bin_op_), - bin_offsets(), - bin_count_atomic(), - bin_count_const(), - sort_order(), - range_begin(range_begin_), - range_end(range_end_), - sort_within_bins(sort_within_bins_) { - static_assert( - Kokkos::SpaceAccessibility::accessible, - "The provided execution space must be able to access the memory space " - "BinSort was initialized with!"); - if (bin_op.max_bins() <= 0) - Kokkos::abort( - "The number of bins in the BinSortOp object must be greater than 0!"); - bin_count_atomic = Kokkos::View( - "Kokkos::SortImpl::BinSortFunctor::bin_count", bin_op.max_bins()); - bin_count_const = bin_count_atomic; - bin_offsets = - offset_type(view_alloc(exec, WithoutInitializing, - "Kokkos::SortImpl::BinSortFunctor::bin_offsets"), - bin_op.max_bins()); - sort_order = - offset_type(view_alloc(exec, WithoutInitializing, - "Kokkos::SortImpl::BinSortFunctor::sort_order"), - range_end - range_begin); - } - - BinSort(const_key_view_type keys_, int range_begin_, int range_end_, - BinSortOp bin_op_, bool sort_within_bins_ = false) - : BinSort(exec_space{}, keys_, range_begin_, range_end_, bin_op_, - sort_within_bins_) {} - - template - BinSort(const ExecutionSpace& exec, const_key_view_type keys_, - BinSortOp bin_op_, bool sort_within_bins_ = false) - : BinSort(exec, keys_, 0, keys_.extent(0), bin_op_, sort_within_bins_) {} - - BinSort(const_key_view_type keys_, BinSortOp bin_op_, - bool sort_within_bins_ = false) - : BinSort(exec_space{}, keys_, bin_op_, sort_within_bins_) {} - - //---------------------------------------- - // Create the permutation vector, the bin_offset array and the bin_count - // array. Can be called again if keys changed - template - void create_permute_vector(const ExecutionSpace& exec) { - static_assert( - Kokkos::SpaceAccessibility::accessible, - "The provided execution space must be able to access the memory space " - "BinSort was initialized with!"); - - const size_t len = range_end - range_begin; - Kokkos::parallel_for( - "Kokkos::Sort::BinCount", - Kokkos::RangePolicy(exec, 0, len), - *this); - Kokkos::parallel_scan("Kokkos::Sort::BinOffset", - Kokkos::RangePolicy( - exec, 0, bin_op.max_bins()), - *this); - - Kokkos::deep_copy(exec, bin_count_atomic, 0); - Kokkos::parallel_for( - "Kokkos::Sort::BinBinning", - Kokkos::RangePolicy(exec, 0, len), - *this); - - if (sort_within_bins) - Kokkos::parallel_for( - "Kokkos::Sort::BinSort", - Kokkos::RangePolicy( - exec, 0, bin_op.max_bins()), - *this); - } - - // Create the permutation vector, the bin_offset array and the bin_count - // array. Can be called again if keys changed - void create_permute_vector() { - Kokkos::fence("Kokkos::Binsort::create_permute_vector: before"); - exec_space e{}; - create_permute_vector(e); - e.fence("Kokkos::Binsort::create_permute_vector: after"); - } - - // Sort a subset of a view with respect to the first dimension using the - // permutation array - template - void sort(const ExecutionSpace& exec, ValuesViewType const& values, - int values_range_begin, int values_range_end) const { - if (values.extent(0) == 0) { - return; - } - - static_assert( - Kokkos::SpaceAccessibility::accessible, - "The provided execution space must be able to access the memory space " - "BinSort was initialized with!"); - static_assert( - Kokkos::SpaceAccessibility< - ExecutionSpace, typename ValuesViewType::memory_space>::accessible, - "The provided execution space must be able to access the memory space " - "of the View argument!"); - - const size_t len = range_end - range_begin; - const size_t values_len = values_range_end - values_range_begin; - if (len != values_len) { - Kokkos::abort( - "BinSort::sort: values range length != permutation vector length"); - } - - using scratch_view_type = - Kokkos::View; - scratch_view_type sorted_values( - view_alloc(exec, WithoutInitializing, - "Kokkos::SortImpl::BinSortFunctor::sorted_values"), - values.rank_dynamic > 0 ? len : KOKKOS_IMPL_CTOR_DEFAULT_ARG, - values.rank_dynamic > 1 ? values.extent(1) - : KOKKOS_IMPL_CTOR_DEFAULT_ARG, - values.rank_dynamic > 2 ? values.extent(2) - : KOKKOS_IMPL_CTOR_DEFAULT_ARG, - values.rank_dynamic > 3 ? values.extent(3) - : KOKKOS_IMPL_CTOR_DEFAULT_ARG, - values.rank_dynamic > 4 ? values.extent(4) - : KOKKOS_IMPL_CTOR_DEFAULT_ARG, - values.rank_dynamic > 5 ? values.extent(5) - : KOKKOS_IMPL_CTOR_DEFAULT_ARG, - values.rank_dynamic > 6 ? values.extent(6) - : KOKKOS_IMPL_CTOR_DEFAULT_ARG, - values.rank_dynamic > 7 ? values.extent(7) - : KOKKOS_IMPL_CTOR_DEFAULT_ARG); - - { - copy_permute_functor - functor(sorted_values, sort_order, values, - values_range_begin - range_begin); - - parallel_for("Kokkos::Sort::CopyPermute", - Kokkos::RangePolicy(exec, 0, len), functor); - } - - { - copy_functor functor( - values, range_begin, sorted_values); - - parallel_for("Kokkos::Sort::Copy", - Kokkos::RangePolicy(exec, 0, len), functor); - } - } - - // Sort a subset of a view with respect to the first dimension using the - // permutation array - template - void sort(ValuesViewType const& values, int values_range_begin, - int values_range_end) const { - Kokkos::fence("Kokkos::Binsort::sort: before"); - exec_space exec; - sort(exec, values, values_range_begin, values_range_end); - exec.fence("Kokkos::BinSort:sort: after"); - } - - template - void sort(ExecutionSpace const& exec, ValuesViewType const& values) const { - this->sort(exec, values, 0, /*values.extent(0)*/ range_end - range_begin); - } - - template - void sort(ValuesViewType const& values) const { - this->sort(values, 0, /*values.extent(0)*/ range_end - range_begin); - } - - // Get the permutation vector - KOKKOS_INLINE_FUNCTION - offset_type get_permute_vector() const { return sort_order; } - - // Get the start offsets for each bin - KOKKOS_INLINE_FUNCTION - offset_type get_bin_offsets() const { return bin_offsets; } - - // Get the count for each bin - KOKKOS_INLINE_FUNCTION - bin_count_type get_bin_count() const { return bin_count_const; } - - public: - KOKKOS_INLINE_FUNCTION - void operator()(const bin_count_tag& /*tag*/, const int i) const { - const int j = range_begin + i; - bin_count_atomic(bin_op.bin(keys, j))++; - } - - KOKKOS_INLINE_FUNCTION - void operator()(const bin_offset_tag& /*tag*/, const int i, - value_type& offset, const bool& final) const { - if (final) { - bin_offsets(i) = offset; - } - offset += bin_count_const(i); - } - - KOKKOS_INLINE_FUNCTION - void operator()(const bin_binning_tag& /*tag*/, const int i) const { - const int j = range_begin + i; - const int bin = bin_op.bin(keys, j); - const int count = bin_count_atomic(bin)++; - - sort_order(bin_offsets(bin) + count) = j; - } - - KOKKOS_INLINE_FUNCTION - void operator()(const bin_sort_bins_tag& /*tag*/, const int i) const { - auto bin_size = bin_count_const(i); - if (bin_size <= 1) return; - constexpr bool use_std_sort = - std::is_same_v; - int lower_bound = bin_offsets(i); - int upper_bound = lower_bound + bin_size; - // Switching to std::sort for more than 10 elements has been found - // reasonable experimentally. - if (use_std_sort && bin_size > 10) { - if constexpr (use_std_sort) { - std::sort(&sort_order(lower_bound), &sort_order(upper_bound), - [this](int p, int q) { return bin_op(keys_rnd, p, q); }); - } - } else { - for (int k = lower_bound + 1; k < upper_bound; ++k) { - int old_idx = sort_order(k); - int j = k - 1; - while (j >= lower_bound) { - int new_idx = sort_order(j); - if (!bin_op(keys_rnd, old_idx, new_idx)) break; - sort_order(j + 1) = new_idx; - --j; - } - sort_order(j + 1) = old_idx; - } - } - } -}; - -//---------------------------------------------------------------------------- - -template -struct BinOp1D { - int max_bins_ = {}; - double mul_ = {}; - double min_ = {}; - -#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 - KOKKOS_DEPRECATED BinOp1D() = default; -#else - BinOp1D() = delete; -#endif - - // Construct BinOp with number of bins, minimum value and maximum value - BinOp1D(int max_bins__, typename KeyViewType::const_value_type min, - typename KeyViewType::const_value_type max) - : max_bins_(max_bins__ + 1), - // Cast to double to avoid possible overflow when using integer - mul_(static_cast(max_bins__) / - (static_cast(max) - static_cast(min))), - min_(static_cast(min)) { - // For integral types the number of bins may be larger than the range - // in which case we can exactly have one unique value per bin - // and then don't need to sort bins. - if (std::is_integral::value && - (static_cast(max) - static_cast(min)) <= - static_cast(max_bins__)) { - mul_ = 1.; - } - } - - // Determine bin index from key value - template - KOKKOS_INLINE_FUNCTION int bin(ViewType& keys, const int& i) const { - return static_cast(mul_ * (static_cast(keys(i)) - min_)); - } - - // Return maximum bin index + 1 - KOKKOS_INLINE_FUNCTION - int max_bins() const { return max_bins_; } - - // Compare to keys within a bin if true new_val will be put before old_val - template - KOKKOS_INLINE_FUNCTION bool operator()(ViewType& keys, iType1& i1, - iType2& i2) const { - return keys(i1) < keys(i2); - } -}; - -template -struct BinOp3D { - int max_bins_[3] = {}; - double mul_[3] = {}; - double min_[3] = {}; - -#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 - KOKKOS_DEPRECATED BinOp3D() = default; -#else - BinOp3D() = delete; -#endif - - BinOp3D(int max_bins__[], typename KeyViewType::const_value_type min[], - typename KeyViewType::const_value_type max[]) { - max_bins_[0] = max_bins__[0]; - max_bins_[1] = max_bins__[1]; - max_bins_[2] = max_bins__[2]; - mul_[0] = static_cast(max_bins__[0]) / - (static_cast(max[0]) - static_cast(min[0])); - mul_[1] = static_cast(max_bins__[1]) / - (static_cast(max[1]) - static_cast(min[1])); - mul_[2] = static_cast(max_bins__[2]) / - (static_cast(max[2]) - static_cast(min[2])); - min_[0] = static_cast(min[0]); - min_[1] = static_cast(min[1]); - min_[2] = static_cast(min[2]); - } - - template - KOKKOS_INLINE_FUNCTION int bin(ViewType& keys, const int& i) const { - return int((((int(mul_[0] * (keys(i, 0) - min_[0])) * max_bins_[1]) + - int(mul_[1] * (keys(i, 1) - min_[1]))) * - max_bins_[2]) + - int(mul_[2] * (keys(i, 2) - min_[2]))); - } - - KOKKOS_INLINE_FUNCTION - int max_bins() const { return max_bins_[0] * max_bins_[1] * max_bins_[2]; } - - template - KOKKOS_INLINE_FUNCTION bool operator()(ViewType& keys, iType1& i1, - iType2& i2) const { - if (keys(i1, 0) > keys(i2, 0)) - return true; - else if (keys(i1, 0) == keys(i2, 0)) { - if (keys(i1, 1) > keys(i2, 1)) - return true; - else if (keys(i1, 1) == keys(i2, 1)) { - if (keys(i1, 2) > keys(i2, 2)) return true; - } - } - return false; - } -}; - -namespace Impl { - -template -struct min_max_functor { - using minmax_scalar = - Kokkos::MinMaxScalar; - - ViewType view; - min_max_functor(const ViewType& view_) : view(view_) {} - - KOKKOS_INLINE_FUNCTION - void operator()(const size_t& i, minmax_scalar& minmax) const { - if (view(i) < minmax.min_val) minmax.min_val = view(i); - if (view(i) > minmax.max_val) minmax.max_val = view(i); - } -}; - -} // namespace Impl - -template -std::enable_if_t<(Kokkos::is_execution_space::value) && - (!SpaceAccessibility< - HostSpace, typename Kokkos::View:: - memory_space>::accessible)> -sort(const ExecutionSpace& exec, - const Kokkos::View& view) { - if (view.extent(0) == 0) { - return; - } - - using ViewType = Kokkos::View; - using CompType = BinOp1D; - - Kokkos::MinMaxScalar result; - Kokkos::MinMax reducer(result); - parallel_reduce("Kokkos::Sort::FindExtent", - Kokkos::RangePolicy( - exec, 0, view.extent(0)), - Impl::min_max_functor(view), reducer); - if (result.min_val == result.max_val) return; - // For integral types the number of bins may be larger than the range - // in which case we can exactly have one unique value per bin - // and then don't need to sort bins. - bool sort_in_bins = true; - // TODO: figure out better max_bins then this ... - int64_t max_bins = view.extent(0) / 2; - if (std::is_integral::value) { - // Cast to double to avoid possible overflow when using integer - auto const max_val = static_cast(result.max_val); - auto const min_val = static_cast(result.min_val); - // using 10M as the cutoff for special behavior (roughly 40MB for the count - // array) - if ((max_val - min_val) < 10000000) { - max_bins = max_val - min_val + 1; - sort_in_bins = false; - } - } - if (std::is_floating_point::value) { - KOKKOS_ASSERT(std::isfinite(static_cast(result.max_val) - - static_cast(result.min_val))); - } - - BinSort bin_sort( - view, CompType(max_bins, result.min_val, result.max_val), sort_in_bins); - bin_sort.create_permute_vector(exec); - bin_sort.sort(exec, view); -} - -#if defined(KOKKOS_ENABLE_ONEDPL) -template -void sort(const Experimental::SYCL& space, - const Kokkos::View& view) { - if (view.extent(0) == 0) { - return; - } - - using ViewType = Kokkos::View; - static_assert(SpaceAccessibility::accessible, - "SYCL execution space is not able to access the memory space " - "of the View argument!"); - - auto queue = space.sycl_queue(); - auto policy = oneapi::dpl::execution::make_device_policy(queue); - - // Can't use Experimental::begin/end here since the oneDPL then assumes that - // the data is on the host. - static_assert( - ViewType::rank == 1 && - (std::is_same::value || - std::is_same::value), - "SYCL sort only supports contiguous 1D Views."); - const int n = view.extent(0); - oneapi::dpl::sort(policy, view.data(), view.data() + n); -} -#endif - -template -std::enable_if_t<(Kokkos::is_execution_space::value) && - (SpaceAccessibility< - HostSpace, typename Kokkos::View:: - memory_space>::accessible)> -sort(const ExecutionSpace&, const Kokkos::View& view) { - if (view.extent(0) == 0) { - return; - } - auto first = Experimental::begin(view); - auto last = Experimental::end(view); - std::sort(first, last); -} - -#if defined(KOKKOS_ENABLE_CUDA) -template -void sort(const Cuda& space, - const Kokkos::View& view) { - if (view.extent(0) == 0) { - return; - } - const auto exec = thrust::cuda::par.on(space.cuda_stream()); - auto first = Experimental::begin(view); - auto last = Experimental::end(view); - thrust::sort(exec, first, last); -} -#endif - -template -void sort(ViewType const& view) { - Kokkos::fence("Kokkos::sort: before"); - - if (view.extent(0) == 0) { - return; - } - - typename ViewType::execution_space exec; - sort(exec, view); - exec.fence("Kokkos::sort: fence after sorting"); -} - -template -std::enable_if_t::value> sort( - const ExecutionSpace& exec, ViewType view, size_t const begin, - size_t const end) { - if (view.extent(0) == 0) { - return; - } - - using range_policy = Kokkos::RangePolicy; - using CompType = BinOp1D; - - Kokkos::MinMaxScalar result; - Kokkos::MinMax reducer(result); - - parallel_reduce("Kokkos::Sort::FindExtent", range_policy(exec, begin, end), - Impl::min_max_functor(view), reducer); - - if (result.min_val == result.max_val) return; - - BinSort bin_sort( - exec, view, begin, end, - CompType((end - begin) / 2, result.min_val, result.max_val), true); - - bin_sort.create_permute_vector(exec); - bin_sort.sort(exec, view, begin, end); -} - -template -void sort(ViewType view, size_t const begin, size_t const end) { - Kokkos::fence("Kokkos::sort: before"); - - if (view.extent(0) == 0) { - return; - } - - typename ViewType::execution_space exec; - sort(exec, view, begin, end); - exec.fence("Kokkos::Sort: fence after sorting"); -} - -} // namespace Kokkos +#include "sorting/Kokkos_BinSortPublicAPI.hpp" +#include "sorting/Kokkos_SortPublicAPI.hpp" +#include "sorting/Kokkos_SortByKeyPublicAPI.hpp" +#include "sorting/Kokkos_NestedSortPublicAPI.hpp" #ifdef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_SORT #undef KOKKOS_IMPL_PUBLIC_INCLUDE diff --git a/algorithms/src/Kokkos_StdAlgorithms.hpp b/algorithms/src/Kokkos_StdAlgorithms.hpp index 436ae0d10b..b532a774e1 100644 --- a/algorithms/src/Kokkos_StdAlgorithms.hpp +++ b/algorithms/src/Kokkos_StdAlgorithms.hpp @@ -35,7 +35,6 @@ // following the std classification. // modifying ops -#include "std_algorithms/Kokkos_Swap.hpp" #include "std_algorithms/Kokkos_IterSwap.hpp" // non-modifying sequence diff --git a/algorithms/src/sorting/Kokkos_BinOpsPublicAPI.hpp b/algorithms/src/sorting/Kokkos_BinOpsPublicAPI.hpp new file mode 100644 index 0000000000..73e751f572 --- /dev/null +++ b/algorithms/src/sorting/Kokkos_BinOpsPublicAPI.hpp @@ -0,0 +1,129 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KOKKOS_BIN_OPS_PUBLIC_API_HPP_ +#define KOKKOS_BIN_OPS_PUBLIC_API_HPP_ + +#include +#include + +namespace Kokkos { + +template +struct BinOp1D { + int max_bins_ = {}; + double mul_ = {}; + double min_ = {}; + +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 + KOKKOS_DEPRECATED BinOp1D() = default; +#else + BinOp1D() = delete; +#endif + + // Construct BinOp with number of bins, minimum value and maximum value + BinOp1D(int max_bins__, typename KeyViewType::const_value_type min, + typename KeyViewType::const_value_type max) + : max_bins_(max_bins__ + 1), + // Cast to double to avoid possible overflow when using integer + mul_(static_cast(max_bins__) / + (static_cast(max) - static_cast(min))), + min_(static_cast(min)) { + // For integral types the number of bins may be larger than the range + // in which case we can exactly have one unique value per bin + // and then don't need to sort bins. + if (std::is_integral::value && + (static_cast(max) - static_cast(min)) <= + static_cast(max_bins__)) { + mul_ = 1.; + } + } + + // Determine bin index from key value + template + KOKKOS_INLINE_FUNCTION int bin(ViewType& keys, const int& i) const { + return static_cast(mul_ * (static_cast(keys(i)) - min_)); + } + + // Return maximum bin index + 1 + KOKKOS_INLINE_FUNCTION + int max_bins() const { return max_bins_; } + + // Compare to keys within a bin if true new_val will be put before old_val + template + KOKKOS_INLINE_FUNCTION bool operator()(ViewType& keys, iType1& i1, + iType2& i2) const { + return keys(i1) < keys(i2); + } +}; + +template +struct BinOp3D { + int max_bins_[3] = {}; + double mul_[3] = {}; + double min_[3] = {}; + +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 + KOKKOS_DEPRECATED BinOp3D() = default; +#else + BinOp3D() = delete; +#endif + + BinOp3D(int max_bins__[], typename KeyViewType::const_value_type min[], + typename KeyViewType::const_value_type max[]) { + max_bins_[0] = max_bins__[0]; + max_bins_[1] = max_bins__[1]; + max_bins_[2] = max_bins__[2]; + mul_[0] = static_cast(max_bins__[0]) / + (static_cast(max[0]) - static_cast(min[0])); + mul_[1] = static_cast(max_bins__[1]) / + (static_cast(max[1]) - static_cast(min[1])); + mul_[2] = static_cast(max_bins__[2]) / + (static_cast(max[2]) - static_cast(min[2])); + min_[0] = static_cast(min[0]); + min_[1] = static_cast(min[1]); + min_[2] = static_cast(min[2]); + } + + template + KOKKOS_INLINE_FUNCTION int bin(ViewType& keys, const int& i) const { + return int((((int(mul_[0] * (keys(i, 0) - min_[0])) * max_bins_[1]) + + int(mul_[1] * (keys(i, 1) - min_[1]))) * + max_bins_[2]) + + int(mul_[2] * (keys(i, 2) - min_[2]))); + } + + KOKKOS_INLINE_FUNCTION + int max_bins() const { return max_bins_[0] * max_bins_[1] * max_bins_[2]; } + + template + KOKKOS_INLINE_FUNCTION bool operator()(ViewType& keys, iType1& i1, + iType2& i2) const { + if (keys(i1, 0) > keys(i2, 0)) + return true; + else if (keys(i1, 0) == keys(i2, 0)) { + if (keys(i1, 1) > keys(i2, 1)) + return true; + else if (keys(i1, 1) == keys(i2, 1)) { + if (keys(i1, 2) > keys(i2, 2)) return true; + } + } + return false; + } +}; + +} // namespace Kokkos +#endif diff --git a/algorithms/src/sorting/Kokkos_BinSortPublicAPI.hpp b/algorithms/src/sorting/Kokkos_BinSortPublicAPI.hpp new file mode 100644 index 0000000000..c399279fe4 --- /dev/null +++ b/algorithms/src/sorting/Kokkos_BinSortPublicAPI.hpp @@ -0,0 +1,410 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KOKKOS_BIN_SORT_PUBLIC_API_HPP_ +#define KOKKOS_BIN_SORT_PUBLIC_API_HPP_ + +#include "Kokkos_BinOpsPublicAPI.hpp" +#include "impl/Kokkos_CopyOpsForBinSortImpl.hpp" +#include +#include + +namespace Kokkos { + +template +class BinSort { + public: + template + struct copy_functor { + using src_view_type = typename SrcViewType::const_type; + + using copy_op = Impl::CopyOp; + + DstViewType dst_values; + src_view_type src_values; + int dst_offset; + + copy_functor(DstViewType const& dst_values_, int const& dst_offset_, + SrcViewType const& src_values_) + : dst_values(dst_values_), + src_values(src_values_), + dst_offset(dst_offset_) {} + + KOKKOS_INLINE_FUNCTION + void operator()(const int& i) const { + copy_op::copy(dst_values, i + dst_offset, src_values, i); + } + }; + + template + struct copy_permute_functor { + // If a Kokkos::View then can generate constant random access + // otherwise can only use the constant type. + + using src_view_type = std::conditional_t< + Kokkos::is_view::value, + Kokkos::View= 230700) + , + Kokkos::MemoryTraits +#endif + >, + typename SrcViewType::const_type>; + + using perm_view_type = typename PermuteViewType::const_type; + + using copy_op = Impl::CopyOp; + + DstViewType dst_values; + perm_view_type sort_order; + src_view_type src_values; + int src_offset; + + copy_permute_functor(DstViewType const& dst_values_, + PermuteViewType const& sort_order_, + SrcViewType const& src_values_, int const& src_offset_) + : dst_values(dst_values_), + sort_order(sort_order_), + src_values(src_values_), + src_offset(src_offset_) {} + + KOKKOS_INLINE_FUNCTION + void operator()(const int& i) const { + copy_op::copy(dst_values, i, src_values, src_offset + sort_order(i)); + } + }; + + // Naming this alias "execution_space" would be problematic since it would be + // considered as execution space for the various functors which might use + // another execution space through sort() or create_permute_vector(). + using exec_space = typename Space::execution_space; + using bin_op_type = BinSortOp; + + struct bin_count_tag {}; + struct bin_offset_tag {}; + struct bin_binning_tag {}; + struct bin_sort_bins_tag {}; + + public: + using size_type = SizeType; + using value_type = size_type; + + using offset_type = Kokkos::View; + using bin_count_type = Kokkos::View; + + using const_key_view_type = typename KeyViewType::const_type; + + // If a Kokkos::View then can generate constant random access + // otherwise can only use the constant type. + + using const_rnd_key_view_type = std::conditional_t< + Kokkos::is_view::value, + Kokkos::View >, + const_key_view_type>; + + using non_const_key_scalar = typename KeyViewType::non_const_value_type; + using const_key_scalar = typename KeyViewType::const_value_type; + + using bin_count_atomic_type = + Kokkos::View >; + + private: + const_key_view_type keys; + const_rnd_key_view_type keys_rnd; + + public: + BinSortOp bin_op; + offset_type bin_offsets; + bin_count_atomic_type bin_count_atomic; + bin_count_type bin_count_const; + offset_type sort_order; + + int range_begin; + int range_end; + bool sort_within_bins; + + public: +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 + KOKKOS_DEPRECATED BinSort() = default; +#else + BinSort() = delete; +#endif + + //---------------------------------------- + // Constructor: takes the keys, the binning_operator and optionally whether to + // sort within bins (default false) + template + BinSort(const ExecutionSpace& exec, const_key_view_type keys_, + int range_begin_, int range_end_, BinSortOp bin_op_, + bool sort_within_bins_ = false) + : keys(keys_), + keys_rnd(keys_), + bin_op(bin_op_), + bin_offsets(), + bin_count_atomic(), + bin_count_const(), + sort_order(), + range_begin(range_begin_), + range_end(range_end_), + sort_within_bins(sort_within_bins_) { + static_assert( + Kokkos::SpaceAccessibility::accessible, + "The provided execution space must be able to access the memory space " + "BinSort was initialized with!"); + if (bin_op.max_bins() <= 0) + Kokkos::abort( + "The number of bins in the BinSortOp object must be greater than 0!"); + bin_count_atomic = Kokkos::View( + "Kokkos::SortImpl::BinSortFunctor::bin_count", bin_op.max_bins()); + bin_count_const = bin_count_atomic; + bin_offsets = + offset_type(view_alloc(exec, WithoutInitializing, + "Kokkos::SortImpl::BinSortFunctor::bin_offsets"), + bin_op.max_bins()); + sort_order = + offset_type(view_alloc(exec, WithoutInitializing, + "Kokkos::SortImpl::BinSortFunctor::sort_order"), + range_end - range_begin); + } + + BinSort(const_key_view_type keys_, int range_begin_, int range_end_, + BinSortOp bin_op_, bool sort_within_bins_ = false) + : BinSort(exec_space{}, keys_, range_begin_, range_end_, bin_op_, + sort_within_bins_) {} + + template + BinSort(const ExecutionSpace& exec, const_key_view_type keys_, + BinSortOp bin_op_, bool sort_within_bins_ = false) + : BinSort(exec, keys_, 0, keys_.extent(0), bin_op_, sort_within_bins_) {} + + BinSort(const_key_view_type keys_, BinSortOp bin_op_, + bool sort_within_bins_ = false) + : BinSort(exec_space{}, keys_, bin_op_, sort_within_bins_) {} + + //---------------------------------------- + // Create the permutation vector, the bin_offset array and the bin_count + // array. Can be called again if keys changed + template + void create_permute_vector(const ExecutionSpace& exec) { + static_assert( + Kokkos::SpaceAccessibility::accessible, + "The provided execution space must be able to access the memory space " + "BinSort was initialized with!"); + + const size_t len = range_end - range_begin; + Kokkos::parallel_for( + "Kokkos::Sort::BinCount", + Kokkos::RangePolicy(exec, 0, len), + *this); + Kokkos::parallel_scan("Kokkos::Sort::BinOffset", + Kokkos::RangePolicy( + exec, 0, bin_op.max_bins()), + *this); + + Kokkos::deep_copy(exec, bin_count_atomic, 0); + Kokkos::parallel_for( + "Kokkos::Sort::BinBinning", + Kokkos::RangePolicy(exec, 0, len), + *this); + + if (sort_within_bins) + Kokkos::parallel_for( + "Kokkos::Sort::BinSort", + Kokkos::RangePolicy( + exec, 0, bin_op.max_bins()), + *this); + } + + // Create the permutation vector, the bin_offset array and the bin_count + // array. Can be called again if keys changed + void create_permute_vector() { + Kokkos::fence("Kokkos::Binsort::create_permute_vector: before"); + exec_space e{}; + create_permute_vector(e); + e.fence("Kokkos::Binsort::create_permute_vector: after"); + } + + // Sort a subset of a view with respect to the first dimension using the + // permutation array + template + void sort(const ExecutionSpace& exec, ValuesViewType const& values, + int values_range_begin, int values_range_end) const { + if (values.extent(0) == 0) { + return; + } + + static_assert( + Kokkos::SpaceAccessibility::accessible, + "The provided execution space must be able to access the memory space " + "BinSort was initialized with!"); + static_assert( + Kokkos::SpaceAccessibility< + ExecutionSpace, typename ValuesViewType::memory_space>::accessible, + "The provided execution space must be able to access the memory space " + "of the View argument!"); + + const size_t len = range_end - range_begin; + const size_t values_len = values_range_end - values_range_begin; + if (len != values_len) { + Kokkos::abort( + "BinSort::sort: values range length != permutation vector length"); + } + + using scratch_view_type = + Kokkos::View; + scratch_view_type sorted_values( + view_alloc(exec, WithoutInitializing, + "Kokkos::SortImpl::BinSortFunctor::sorted_values"), + values.rank_dynamic > 0 ? len : KOKKOS_IMPL_CTOR_DEFAULT_ARG, + values.rank_dynamic > 1 ? values.extent(1) + : KOKKOS_IMPL_CTOR_DEFAULT_ARG, + values.rank_dynamic > 2 ? values.extent(2) + : KOKKOS_IMPL_CTOR_DEFAULT_ARG, + values.rank_dynamic > 3 ? values.extent(3) + : KOKKOS_IMPL_CTOR_DEFAULT_ARG, + values.rank_dynamic > 4 ? values.extent(4) + : KOKKOS_IMPL_CTOR_DEFAULT_ARG, + values.rank_dynamic > 5 ? values.extent(5) + : KOKKOS_IMPL_CTOR_DEFAULT_ARG, + values.rank_dynamic > 6 ? values.extent(6) + : KOKKOS_IMPL_CTOR_DEFAULT_ARG, + values.rank_dynamic > 7 ? values.extent(7) + : KOKKOS_IMPL_CTOR_DEFAULT_ARG); + + { + copy_permute_functor + functor(sorted_values, sort_order, values, + values_range_begin - range_begin); + + parallel_for("Kokkos::Sort::CopyPermute", + Kokkos::RangePolicy(exec, 0, len), functor); + } + + { + copy_functor functor( + values, range_begin, sorted_values); + + parallel_for("Kokkos::Sort::Copy", + Kokkos::RangePolicy(exec, 0, len), functor); + } + } + + // Sort a subset of a view with respect to the first dimension using the + // permutation array + template + void sort(ValuesViewType const& values, int values_range_begin, + int values_range_end) const { + Kokkos::fence("Kokkos::Binsort::sort: before"); + exec_space exec; + sort(exec, values, values_range_begin, values_range_end); + exec.fence("Kokkos::BinSort:sort: after"); + } + + template + void sort(ExecutionSpace const& exec, ValuesViewType const& values) const { + this->sort(exec, values, 0, /*values.extent(0)*/ range_end - range_begin); + } + + template + void sort(ValuesViewType const& values) const { + this->sort(values, 0, /*values.extent(0)*/ range_end - range_begin); + } + + // Get the permutation vector + KOKKOS_INLINE_FUNCTION + offset_type get_permute_vector() const { return sort_order; } + + // Get the start offsets for each bin + KOKKOS_INLINE_FUNCTION + offset_type get_bin_offsets() const { return bin_offsets; } + + // Get the count for each bin + KOKKOS_INLINE_FUNCTION + bin_count_type get_bin_count() const { return bin_count_const; } + + public: + KOKKOS_INLINE_FUNCTION + void operator()(const bin_count_tag& /*tag*/, const int i) const { + const int j = range_begin + i; + bin_count_atomic(bin_op.bin(keys, j))++; + } + + KOKKOS_INLINE_FUNCTION + void operator()(const bin_offset_tag& /*tag*/, const int i, + value_type& offset, const bool& final) const { + if (final) { + bin_offsets(i) = offset; + } + offset += bin_count_const(i); + } + + KOKKOS_INLINE_FUNCTION + void operator()(const bin_binning_tag& /*tag*/, const int i) const { + const int j = range_begin + i; + const int bin = bin_op.bin(keys, j); + const int count = bin_count_atomic(bin)++; + + sort_order(bin_offsets(bin) + count) = j; + } + + KOKKOS_INLINE_FUNCTION + void operator()(const bin_sort_bins_tag& /*tag*/, const int i) const { + auto bin_size = bin_count_const(i); + if (bin_size <= 1) return; + constexpr bool use_std_sort = + std::is_same_v; + int lower_bound = bin_offsets(i); + int upper_bound = lower_bound + bin_size; + // Switching to std::sort for more than 10 elements has been found + // reasonable experimentally. + if (use_std_sort && bin_size > 10) { + KOKKOS_IF_ON_HOST( + (std::sort(&sort_order(lower_bound), &sort_order(upper_bound), + [this](int p, int q) { return bin_op(keys_rnd, p, q); });)) + } else { + for (int k = lower_bound + 1; k < upper_bound; ++k) { + int old_idx = sort_order(k); + int j = k - 1; + while (j >= lower_bound) { + int new_idx = sort_order(j); + if (!bin_op(keys_rnd, old_idx, new_idx)) break; + sort_order(j + 1) = new_idx; + --j; + } + sort_order(j + 1) = old_idx; + } + } + } +}; + +} // namespace Kokkos +#endif diff --git a/algorithms/src/sorting/Kokkos_NestedSortPublicAPI.hpp b/algorithms/src/sorting/Kokkos_NestedSortPublicAPI.hpp new file mode 100644 index 0000000000..dd468e0734 --- /dev/null +++ b/algorithms/src/sorting/Kokkos_NestedSortPublicAPI.hpp @@ -0,0 +1,100 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KOKKOS_NESTED_SORT_PUBLIC_API_HPP_ +#define KOKKOS_NESTED_SORT_PUBLIC_API_HPP_ + +#include "impl/Kokkos_NestedSortImpl.hpp" +#include +#include + +namespace Kokkos { +namespace Experimental { + +template +KOKKOS_INLINE_FUNCTION void sort_team(const TeamMember& t, + const ViewType& view) { + Impl::sort_nested_impl(t, view, nullptr, + Experimental::Impl::StdAlgoLessThanBinaryPredicate< + typename ViewType::non_const_value_type>(), + Impl::NestedRange()); +} + +template +KOKKOS_INLINE_FUNCTION void sort_team(const TeamMember& t, const ViewType& view, + const Comparator& comp) { + Impl::sort_nested_impl(t, view, nullptr, comp, Impl::NestedRange()); +} + +template +KOKKOS_INLINE_FUNCTION void sort_by_key_team(const TeamMember& t, + const KeyViewType& keyView, + const ValueViewType& valueView) { + Impl::sort_nested_impl(t, keyView, valueView, + Experimental::Impl::StdAlgoLessThanBinaryPredicate< + typename KeyViewType::non_const_value_type>(), + Impl::NestedRange()); +} + +template +KOKKOS_INLINE_FUNCTION void sort_by_key_team(const TeamMember& t, + const KeyViewType& keyView, + const ValueViewType& valueView, + const Comparator& comp) { + Impl::sort_nested_impl(t, keyView, valueView, comp, + Impl::NestedRange()); +} + +template +KOKKOS_INLINE_FUNCTION void sort_thread(const TeamMember& t, + const ViewType& view) { + Impl::sort_nested_impl(t, view, nullptr, + Experimental::Impl::StdAlgoLessThanBinaryPredicate< + typename ViewType::non_const_value_type>(), + Impl::NestedRange()); +} + +template +KOKKOS_INLINE_FUNCTION void sort_thread(const TeamMember& t, + const ViewType& view, + const Comparator& comp) { + Impl::sort_nested_impl(t, view, nullptr, comp, Impl::NestedRange()); +} + +template +KOKKOS_INLINE_FUNCTION void sort_by_key_thread(const TeamMember& t, + const KeyViewType& keyView, + const ValueViewType& valueView) { + Impl::sort_nested_impl(t, keyView, valueView, + Experimental::Impl::StdAlgoLessThanBinaryPredicate< + typename KeyViewType::non_const_value_type>(), + Impl::NestedRange()); +} + +template +KOKKOS_INLINE_FUNCTION void sort_by_key_thread(const TeamMember& t, + const KeyViewType& keyView, + const ValueViewType& valueView, + const Comparator& comp) { + Impl::sort_nested_impl(t, keyView, valueView, comp, + Impl::NestedRange()); +} + +} // namespace Experimental +} // namespace Kokkos +#endif diff --git a/algorithms/src/sorting/Kokkos_SortByKeyPublicAPI.hpp b/algorithms/src/sorting/Kokkos_SortByKeyPublicAPI.hpp new file mode 100644 index 0000000000..fc73eccad6 --- /dev/null +++ b/algorithms/src/sorting/Kokkos_SortByKeyPublicAPI.hpp @@ -0,0 +1,117 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KOKKOS_SORT_BY_KEY_PUBLIC_API_HPP_ +#define KOKKOS_SORT_BY_KEY_PUBLIC_API_HPP_ + +#include "./impl/Kokkos_SortByKeyImpl.hpp" +#include +#include + +namespace Kokkos::Experimental { + +// --------------------------------------------------------------- +// basic overloads +// --------------------------------------------------------------- + +template +void sort_by_key( + const ExecutionSpace& exec, + const Kokkos::View& keys, + const Kokkos::View& values) { + // constraints + using KeysType = Kokkos::View; + using ValuesType = Kokkos::View; + ::Kokkos::Impl::static_assert_is_admissible_to_kokkos_sort_by_key(keys); + ::Kokkos::Impl::static_assert_is_admissible_to_kokkos_sort_by_key(values); + + static_assert(SpaceAccessibility::accessible, + "Kokkos::sort: execution space instance is not able to access " + "the memory space of the keys View argument!"); + static_assert( + SpaceAccessibility::accessible, + "Kokkos::sort: execution space instance is not able to access " + "the memory space of the values View argument!"); + + static_assert(KeysType::static_extent(0) == 0 || + ValuesType::static_extent(0) == 0 || + KeysType::static_extent(0) == ValuesType::static_extent(0)); + if (values.size() != keys.size()) + Kokkos::abort((std::string("values and keys extents must be the same. The " + "values extent is ") + + std::to_string(values.size()) + ", and the keys extent is " + + std::to_string(keys.size()) + ".") + .c_str()); + + if (keys.extent(0) <= 1) { + return; + } + + ::Kokkos::Impl::sort_by_key_device_view_without_comparator(exec, keys, + values); +} + +// --------------------------------------------------------------- +// overloads supporting a custom comparator +// --------------------------------------------------------------- + +template +void sort_by_key( + const ExecutionSpace& exec, + const Kokkos::View& keys, + const Kokkos::View& values, + const ComparatorType& comparator) { + // constraints + using KeysType = Kokkos::View; + using ValuesType = Kokkos::View; + ::Kokkos::Impl::static_assert_is_admissible_to_kokkos_sort_by_key(keys); + ::Kokkos::Impl::static_assert_is_admissible_to_kokkos_sort_by_key(values); + + static_assert(SpaceAccessibility::accessible, + "Kokkos::sort: execution space instance is not able to access " + "the memory space of the keys View argument!"); + static_assert( + SpaceAccessibility::accessible, + "Kokkos::sort: execution space instance is not able to access " + "the memory space of the values View argument!"); + + static_assert(KeysType::static_extent(0) == 0 || + ValuesType::static_extent(0) == 0 || + KeysType::static_extent(0) == ValuesType::static_extent(0)); + if (values.size() != keys.size()) + Kokkos::abort((std::string("values and keys extents must be the same. The " + "values extent is ") + + std::to_string(values.size()) + ", and the keys extent is " + + std::to_string(keys.size()) + ".") + .c_str()); + + if (keys.extent(0) <= 1) { + return; + } + + ::Kokkos::Impl::sort_by_key_device_view_with_comparator(exec, keys, values, + comparator); +} + +} // namespace Kokkos::Experimental +#endif diff --git a/algorithms/src/sorting/Kokkos_SortPublicAPI.hpp b/algorithms/src/sorting/Kokkos_SortPublicAPI.hpp new file mode 100644 index 0000000000..308e9e3a00 --- /dev/null +++ b/algorithms/src/sorting/Kokkos_SortPublicAPI.hpp @@ -0,0 +1,196 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KOKKOS_SORT_PUBLIC_API_HPP_ +#define KOKKOS_SORT_PUBLIC_API_HPP_ + +#include "./impl/Kokkos_SortImpl.hpp" +#include +#include +#include + +namespace Kokkos { + +// --------------------------------------------------------------- +// basic overloads +// --------------------------------------------------------------- + +template +void sort(const ExecutionSpace& exec, + const Kokkos::View& view) { + // constraints + using ViewType = Kokkos::View; + using MemSpace = typename ViewType::memory_space; + static_assert( + ViewType::rank == 1 && + (std::is_same_v || + std::is_same_v || + std::is_same_v), + "Kokkos::sort without comparator: supports 1D Views with LayoutRight, " + "LayoutLeft or LayoutStride."); + + static_assert(SpaceAccessibility::accessible, + "Kokkos::sort: execution space instance is not able to access " + "the memory space of the " + "View argument!"); + + if (view.extent(0) <= 1) { + return; + } + + if constexpr (Impl::better_off_calling_std_sort_v) { + exec.fence("Kokkos::sort without comparator use std::sort"); + auto first = ::Kokkos::Experimental::begin(view); + auto last = ::Kokkos::Experimental::end(view); + std::sort(first, last); + } else { + Impl::sort_device_view_without_comparator(exec, view); + } +} + +template +void sort(const Kokkos::View& view) { + using ViewType = Kokkos::View; + static_assert(ViewType::rank == 1, + "Kokkos::sort: currently only supports rank-1 Views."); + + Kokkos::fence("Kokkos::sort: before"); + + if (view.extent(0) <= 1) { + return; + } + + typename ViewType::execution_space exec; + sort(exec, view); + exec.fence("Kokkos::sort: fence after sorting"); +} + +// --------------------------------------------------------------- +// overloads supporting a custom comparator +// --------------------------------------------------------------- +template +void sort(const ExecutionSpace& exec, + const Kokkos::View& view, + const ComparatorType& comparator) { + // constraints + using ViewType = Kokkos::View; + using MemSpace = typename ViewType::memory_space; + static_assert( + ViewType::rank == 1 && + (std::is_same_v || + std::is_same_v || + std::is_same_v), + "Kokkos::sort with comparator: supports 1D Views with LayoutRight, " + "LayoutLeft or LayoutStride."); + + static_assert(SpaceAccessibility::accessible, + "Kokkos::sort: execution space instance is not able to access " + "the memory space of the View argument!"); + + if (view.extent(0) <= 1) { + return; + } + + if constexpr (Impl::better_off_calling_std_sort_v) { + exec.fence("Kokkos::sort with comparator use std::sort"); + auto first = ::Kokkos::Experimental::begin(view); + auto last = ::Kokkos::Experimental::end(view); + std::sort(first, last, comparator); + } else { + Impl::sort_device_view_with_comparator(exec, view, comparator); + } +} + +template +void sort(const Kokkos::View& view, + const ComparatorType& comparator) { + using ViewType = Kokkos::View; + static_assert( + ViewType::rank == 1 && + (std::is_same_v || + std::is_same_v || + std::is_same_v), + "Kokkos::sort with comparator: supports 1D Views with LayoutRight, " + "LayoutLeft or LayoutStride."); + + Kokkos::fence("Kokkos::sort with comparator: before"); + + if (view.extent(0) <= 1) { + return; + } + + typename ViewType::execution_space exec; + sort(exec, view, comparator); + exec.fence("Kokkos::sort with comparator: fence after sorting"); +} + +// --------------------------------------------------------------- +// overloads for sorting a view with a subrange +// specified via integers begin, end +// --------------------------------------------------------------- + +template +std::enable_if_t::value> sort( + const ExecutionSpace& exec, ViewType view, size_t const begin, + size_t const end) { + // view must be rank-1 because the Impl::min_max_functor + // used below only works for rank-1 views for now + static_assert(ViewType::rank == 1, + "Kokkos::sort: currently only supports rank-1 Views."); + + if (view.extent(0) <= 1) { + return; + } + + using range_policy = Kokkos::RangePolicy; + using CompType = BinOp1D; + + Kokkos::MinMaxScalar result; + Kokkos::MinMax reducer(result); + + parallel_reduce("Kokkos::Sort::FindExtent", range_policy(exec, begin, end), + Impl::min_max_functor(view), reducer); + + if (result.min_val == result.max_val) return; + + BinSort bin_sort( + exec, view, begin, end, + CompType((end - begin) / 2, result.min_val, result.max_val), true); + + bin_sort.create_permute_vector(exec); + bin_sort.sort(exec, view, begin, end); +} + +template +void sort(ViewType view, size_t const begin, size_t const end) { + // same constraints as the overload above which this gets dispatched to + static_assert(ViewType::rank == 1, + "Kokkos::sort: currently only supports rank-1 Views."); + + Kokkos::fence("Kokkos::sort: before"); + + if (view.extent(0) <= 1) { + return; + } + + typename ViewType::execution_space exec; + sort(exec, view, begin, end); + exec.fence("Kokkos::Sort: fence after sorting"); +} + +} // namespace Kokkos +#endif diff --git a/algorithms/src/sorting/impl/Kokkos_CopyOpsForBinSortImpl.hpp b/algorithms/src/sorting/impl/Kokkos_CopyOpsForBinSortImpl.hpp new file mode 100644 index 0000000000..07f5926d82 --- /dev/null +++ b/algorithms/src/sorting/impl/Kokkos_CopyOpsForBinSortImpl.hpp @@ -0,0 +1,61 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KOKKOS_COPY_OPS_FOR_BINSORT_IMPL_HPP_ +#define KOKKOS_COPY_OPS_FOR_BINSORT_IMPL_HPP_ + +#include +#include + +namespace Kokkos { +namespace Impl { + +template +struct CopyOp; + +template +struct CopyOp { + KOKKOS_INLINE_FUNCTION + static void copy(DstViewType const& dst, size_t i_dst, SrcViewType const& src, + size_t i_src) { + dst(i_dst) = src(i_src); + } +}; + +template +struct CopyOp { + KOKKOS_INLINE_FUNCTION + static void copy(DstViewType const& dst, size_t i_dst, SrcViewType const& src, + size_t i_src) { + for (int j = 0; j < (int)dst.extent(1); j++) dst(i_dst, j) = src(i_src, j); + } +}; + +template +struct CopyOp { + KOKKOS_INLINE_FUNCTION + static void copy(DstViewType const& dst, size_t i_dst, SrcViewType const& src, + size_t i_src) { + for (int j = 0; j < dst.extent(1); j++) + for (int k = 0; k < dst.extent(2); k++) + dst(i_dst, j, k) = src(i_src, j, k); + } +}; + +} // namespace Impl +} // namespace Kokkos + +#endif diff --git a/algorithms/src/sorting/impl/Kokkos_NestedSortImpl.hpp b/algorithms/src/sorting/impl/Kokkos_NestedSortImpl.hpp new file mode 100644 index 0000000000..2fe58272d9 --- /dev/null +++ b/algorithms/src/sorting/impl/Kokkos_NestedSortImpl.hpp @@ -0,0 +1,114 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KOKKOS_NESTED_SORT_IMPL_HPP_ +#define KOKKOS_NESTED_SORT_IMPL_HPP_ + +#include + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +// true for TeamVectorRange, false for ThreadVectorRange +template +struct NestedRange {}; + +// Specialization for team-level +template <> +struct NestedRange { + template + KOKKOS_FUNCTION static auto create(const TeamMember& t, SizeType len) { + return Kokkos::TeamVectorRange(t, len); + } + template + KOKKOS_FUNCTION static void barrier(const TeamMember& t) { + t.team_barrier(); + } +}; + +// Specialization for thread-level +template <> +struct NestedRange { + template + KOKKOS_FUNCTION static auto create(const TeamMember& t, SizeType len) { + return Kokkos::ThreadVectorRange(t, len); + } + // Barrier is no-op, as vector lanes of a thread are implicitly synchronized + // after parallel region + template + KOKKOS_FUNCTION static void barrier(const TeamMember&) {} +}; + +// When just doing sort (not sort_by_key), use nullptr_t for ValueViewType. +// This only takes the NestedRange instance for template arg deduction. +template +KOKKOS_INLINE_FUNCTION void sort_nested_impl( + const TeamMember& t, const KeyViewType& keyView, + [[maybe_unused]] const ValueViewType& valueView, const Comparator& comp, + const NestedRange) { + using SizeType = typename KeyViewType::size_type; + using KeyType = typename KeyViewType::non_const_value_type; + using Range = NestedRange; + SizeType n = keyView.extent(0); + SizeType npot = 1; + SizeType levels = 0; + // FIXME: ceiling power-of-two is a common thing to need - make it a utility + while (npot < n) { + levels++; + npot <<= 1; + } + for (SizeType i = 0; i < levels; i++) { + for (SizeType j = 0; j <= i; j++) { + // n/2 pairs of items are compared in parallel + Kokkos::parallel_for(Range::create(t, npot / 2), [=](const SizeType k) { + // How big are the brown/pink boxes? + // (Terminology comes from Wikipedia diagram) + // https://commons.wikimedia.org/wiki/File:BitonicSort.svg#/media/File:BitonicSort.svg + SizeType boxSize = SizeType(2) << (i - j); + // Which box contains this thread? + SizeType boxID = k >> (i - j); // k * 2 / boxSize; + SizeType boxStart = boxID << (1 + i - j); // boxID * boxSize + SizeType boxOffset = k - (boxStart >> 1); // k - boxID * boxSize / 2; + SizeType elem1 = boxStart + boxOffset; + // In first phase (j == 0, brown box): within a box, compare with the + // opposite value in the box. + // In later phases (j > 0, pink box): within a box, compare with fixed + // distance (boxSize / 2) apart. + SizeType elem2 = (j == 0) ? (boxStart + boxSize - 1 - boxOffset) + : (elem1 + boxSize / 2); + if (elem2 < n) { + KeyType key1 = keyView(elem1); + KeyType key2 = keyView(elem2); + if (comp(key2, key1)) { + keyView(elem1) = key2; + keyView(elem2) = key1; + if constexpr (!std::is_same_v) { + Kokkos::kokkos_swap(valueView(elem1), valueView(elem2)); + } + } + } + }); + Range::barrier(t); + } + } +} + +} // namespace Impl +} // namespace Experimental +} // namespace Kokkos +#endif diff --git a/algorithms/src/sorting/impl/Kokkos_SortByKeyImpl.hpp b/algorithms/src/sorting/impl/Kokkos_SortByKeyImpl.hpp new file mode 100644 index 0000000000..f11f807048 --- /dev/null +++ b/algorithms/src/sorting/impl/Kokkos_SortByKeyImpl.hpp @@ -0,0 +1,424 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KOKKOS_SORT_BY_KEY_FREE_FUNCS_IMPL_HPP_ +#define KOKKOS_SORT_BY_KEY_FREE_FUNCS_IMPL_HPP_ + +#include + +#if defined(KOKKOS_ENABLE_CUDA) + +// Workaround for `Instruction 'shfl' without '.sync' is not supported on +// .target sm_70 and higher from PTX ISA version 6.4`. +// Also see https://github.com/NVIDIA/cub/pull/170. +#if !defined(CUB_USE_COOPERATIVE_GROUPS) +#define CUB_USE_COOPERATIVE_GROUPS +#endif + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wshadow" + +#if defined(KOKKOS_COMPILER_CLANG) +// Some versions of Clang fail to compile Thrust, failing with errors like +// this: +// /thrust/system/cuda/detail/core/agent_launcher.h:557:11: +// error: use of undeclared identifier 'va_printf' +// The exact combination of versions for Clang and Thrust (or CUDA) for this +// failure was not investigated, however even very recent version combination +// (Clang 10.0.0 and Cuda 10.0) demonstrated failure. +// +// Defining _CubLog here locally allows us to avoid that code path, however +// disabling some debugging diagnostics +#pragma push_macro("_CubLog") +#ifdef _CubLog +#undef _CubLog +#endif +#define _CubLog +#include +#include +#pragma pop_macro("_CubLog") +#else +#include +#include +#endif + +#pragma GCC diagnostic pop + +#endif + +#if defined(KOKKOS_ENABLE_ROCTHRUST) +#include +#include +#endif + +#if defined(KOKKOS_ENABLE_ONEDPL) && \ + (ONEDPL_VERSION_MAJOR > 2022 || \ + (ONEDPL_VERSION_MAJOR == 2022 && ONEDPL_VERSION_MINOR >= 2)) +#define KOKKOS_ONEDPL_HAS_SORT_BY_KEY +#include +#include +#endif + +namespace Kokkos::Impl { + +template +constexpr inline bool is_admissible_to_kokkos_sort_by_key = + ::Kokkos::is_view::value&& T::rank() == 1 && + (std::is_same::value || + std::is_same::value || + std::is_same::value); + +template +KOKKOS_INLINE_FUNCTION constexpr void +static_assert_is_admissible_to_kokkos_sort_by_key(const ViewType& /* view */) { + static_assert(is_admissible_to_kokkos_sort_by_key, + "Kokkos::sort_by_key only accepts 1D values View with " + "LayoutRight, LayoutLeft or LayoutStride."); +} + +// For the fallback implementation for sort_by_key using Kokkos::sort, we need +// to consider if Kokkos::sort defers to the fallback implementation that copies +// the array to the host and uses std::sort, see +// copy_to_host_run_stdsort_copy_back() in impl/Kokkos_SortImpl.hpp. If +// sort_on_device_v is true, we assume that std::sort doesn't copy data. +// Otherwise, we manually copy all data to the host and provide Kokkos::sort +// with a host execution space. +template +inline constexpr bool sort_on_device_v = false; + +#if defined(KOKKOS_ENABLE_CUDA) +template +inline constexpr bool sort_on_device_v = true; + +template +void sort_by_key_cudathrust( + const Kokkos::Cuda& exec, + const Kokkos::View& keys, + const Kokkos::View& values, + MaybeComparator&&... maybeComparator) { + const auto policy = thrust::cuda::par.on(exec.cuda_stream()); + auto keys_first = ::Kokkos::Experimental::begin(keys); + auto keys_last = ::Kokkos::Experimental::end(keys); + auto values_first = ::Kokkos::Experimental::begin(values); + thrust::sort_by_key(policy, keys_first, keys_last, values_first, + std::forward(maybeComparator)...); +} +#endif + +#if defined(KOKKOS_ENABLE_ROCTHRUST) +template +inline constexpr bool sort_on_device_v = true; + +template +void sort_by_key_rocthrust( + const Kokkos::HIP& exec, + const Kokkos::View& keys, + const Kokkos::View& values, + MaybeComparator&&... maybeComparator) { + const auto policy = thrust::hip::par.on(exec.hip_stream()); + auto keys_first = ::Kokkos::Experimental::begin(keys); + auto keys_last = ::Kokkos::Experimental::end(keys); + auto values_first = ::Kokkos::Experimental::begin(values); + thrust::sort_by_key(policy, keys_first, keys_last, values_first, + std::forward(maybeComparator)...); +} +#endif + +#if defined(KOKKOS_ENABLE_ONEDPL) +template +inline constexpr bool sort_on_device_v = + std::is_same_v || + std::is_same_v; + +#ifdef KOKKOS_ONEDPL_HAS_SORT_BY_KEY +template +void sort_by_key_onedpl( + const Kokkos::Experimental::SYCL& exec, + const Kokkos::View& keys, + const Kokkos::View& values, + MaybeComparator&&... maybeComparator) { + if (keys.stride(0) != 1 && values.stride(0) != 1) { + Kokkos::abort( + "SYCL sort_by_key only supports rank-1 Views with stride(0) = 1."); + } + + // Can't use Experimental::begin/end here since the oneDPL then assumes that + // the data is on the host. + auto queue = exec.sycl_queue(); + auto policy = oneapi::dpl::execution::make_device_policy(queue); + const int n = keys.extent(0); + oneapi::dpl::sort_by_key(policy, keys.data(), keys.data() + n, values.data(), + std::forward(maybeComparator)...); +} +#endif +#endif + +template +void applyPermutation(const ExecutionSpace& space, + const PermutationView& permutation, + const ViewType& view) { + static_assert(std::is_integral::value); + + auto view_copy = Kokkos::create_mirror( + Kokkos::view_alloc(space, typename ExecutionSpace::memory_space{}, + Kokkos::WithoutInitializing), + view); + Kokkos::deep_copy(space, view_copy, view); + Kokkos::parallel_for( + "Kokkos::sort_by_key_via_sort::permute_" + view.label(), + Kokkos::RangePolicy(space, 0, view.extent(0)), + KOKKOS_LAMBDA(int i) { view(i) = view_copy(permutation(i)); }); +} + +// FIXME_NVCC: nvcc has trouble compiling lambdas inside a function with +// variadic templates (sort_by_key_via_sort). Switch to using functors instead. +template +struct IotaFunctor { + Permute _permute; + KOKKOS_FUNCTION void operator()(int i) const { _permute(i) = i; } +}; +template +struct LessFunctor { + Keys _keys; + KOKKOS_FUNCTION bool operator()(int i, int j) const { + return _keys(i) < _keys(j); + } +}; + +// FIXME_NVCC+MSVC: We can't use a lambda instead of a functor which gave us +// "For this host platform/dialect, an extended lambda cannot be defined inside +// the 'if' or 'else' block of a constexpr if statement" +template +struct KeyComparisonFunctor { + Keys m_keys; + Comparator m_comparator; + KOKKOS_FUNCTION bool operator()(int i, int j) const { + return m_comparator(m_keys(i), m_keys(j)); + } +}; + +template +void sort_by_key_via_sort( + const ExecutionSpace& exec, + const Kokkos::View& keys, + const Kokkos::View& values, + MaybeComparator&&... maybeComparator) { + static_assert(sizeof...(MaybeComparator) <= 1); + + auto const n = keys.size(); + + Kokkos::View permute( + Kokkos::view_alloc(exec, Kokkos::WithoutInitializing, + "Kokkos::sort_by_key_via_sort::permute"), + n); + + // iota + Kokkos::parallel_for("Kokkos::sort_by_key_via_sort::iota", + Kokkos::RangePolicy(exec, 0, n), + IotaFunctor{permute}); + + using Layout = + typename Kokkos::View::array_layout; + if constexpr (!sort_on_device_v) { + auto host_keys = Kokkos::create_mirror_view( + Kokkos::view_alloc(Kokkos::HostSpace{}, Kokkos::WithoutInitializing), + keys); + auto host_permute = Kokkos::create_mirror_view( + Kokkos::view_alloc(Kokkos::HostSpace{}, Kokkos::WithoutInitializing), + permute); + Kokkos::deep_copy(exec, host_keys, keys); + Kokkos::deep_copy(exec, host_permute, permute); + + exec.fence("Kokkos::Impl::sort_by_key_via_sort: before host sort"); + Kokkos::DefaultHostExecutionSpace host_exec; + + if constexpr (sizeof...(MaybeComparator) == 0) { + Kokkos::sort(host_exec, host_permute, + LessFunctor{host_keys}); + } else { + auto keys_comparator = + std::get<0>(std::tuple(maybeComparator...)); + Kokkos::sort( + host_exec, host_permute, + KeyComparisonFunctor{ + host_keys, keys_comparator}); + } + host_exec.fence("Kokkos::Impl::sort_by_key_via_sort: after host sort"); + Kokkos::deep_copy(exec, permute, host_permute); + } else { +#ifdef KOKKOS_ENABLE_SYCL + auto* raw_keys_in_comparator = keys.data(); + auto stride = keys.stride(0); + if constexpr (sizeof...(MaybeComparator) == 0) { + Kokkos::sort( + exec, permute, KOKKOS_LAMBDA(int i, int j) { + return raw_keys_in_comparator[i * stride] < + raw_keys_in_comparator[j * stride]; + }); + } else { + auto keys_comparator = + std::get<0>(std::tuple(maybeComparator...)); + Kokkos::sort( + exec, permute, KOKKOS_LAMBDA(int i, int j) { + return keys_comparator(raw_keys_in_comparator[i * stride], + raw_keys_in_comparator[j * stride]); + }); + } +#else + if constexpr (sizeof...(MaybeComparator) == 0) { + Kokkos::sort(exec, permute, LessFunctor{keys}); + } else { + auto keys_comparator = + std::get<0>(std::tuple(maybeComparator...)); + Kokkos::sort( + exec, permute, + KeyComparisonFunctor{ + keys, keys_comparator}); + } +#endif + } + + applyPermutation(exec, permute, keys); + applyPermutation(exec, permute, values); +} + +// ------------------------------------------------------ +// +// specialize cases for sorting by key without comparator +// +// ------------------------------------------------------ + +#if defined(KOKKOS_ENABLE_CUDA) +template +void sort_by_key_device_view_without_comparator( + const Kokkos::Cuda& exec, + const Kokkos::View& keys, + const Kokkos::View& values) { + sort_by_key_cudathrust(exec, keys, values); +} +#endif + +#if defined(KOKKOS_ENABLE_ROCTHRUST) +template +void sort_by_key_device_view_without_comparator( + const Kokkos::HIP& exec, + const Kokkos::View& keys, + const Kokkos::View& values) { + sort_by_key_rocthrust(exec, keys, values); +} +#endif + +#if defined(KOKKOS_ENABLE_ONEDPL) +template +void sort_by_key_device_view_without_comparator( + const Kokkos::Experimental::SYCL& exec, + const Kokkos::View& keys, + const Kokkos::View& values) { +#ifdef KOKKOS_ONEDPL_HAS_SORT_BY_KEY + if (keys.stride(0) == 1 && values.stride(0) == 1) + sort_by_key_onedpl(exec, keys, values); + else +#endif + sort_by_key_via_sort(exec, keys, values); +} +#endif + +// fallback case +template +std::enable_if_t::value> +sort_by_key_device_view_without_comparator( + const ExecutionSpace& exec, + const Kokkos::View& keys, + const Kokkos::View& values) { + sort_by_key_via_sort(exec, keys, values); +} + +// --------------------------------------------------- +// +// specialize cases for sorting by key with comparator +// +// --------------------------------------------------- + +#if defined(KOKKOS_ENABLE_CUDA) +template +void sort_by_key_device_view_with_comparator( + const Kokkos::Cuda& exec, + const Kokkos::View& keys, + const Kokkos::View& values, + const ComparatorType& comparator) { + sort_by_key_cudathrust(exec, keys, values, comparator); +} +#endif + +#if defined(KOKKOS_ENABLE_ROCTHRUST) +template +void sort_by_key_device_view_with_comparator( + const Kokkos::HIP& exec, + const Kokkos::View& keys, + const Kokkos::View& values, + const ComparatorType& comparator) { + sort_by_key_rocthrust(exec, keys, values, comparator); +} +#endif + +#if defined(KOKKOS_ENABLE_ONEDPL) +template +void sort_by_key_device_view_with_comparator( + const Kokkos::Experimental::SYCL& exec, + const Kokkos::View& keys, + const Kokkos::View& values, + const ComparatorType& comparator) { +#ifdef KOKKOS_ONEDPL_HAS_SORT_BY_KEY + if (keys.stride(0) == 1 && values.stride(0) == 1) + sort_by_key_onedpl(exec, keys, values, comparator); + else +#endif + sort_by_key_via_sort(exec, keys, values, comparator); +} +#endif + +// fallback case +template +std::enable_if_t::value> +sort_by_key_device_view_with_comparator( + const ExecutionSpace& exec, + const Kokkos::View& keys, + const Kokkos::View& values, + const ComparatorType& comparator) { + sort_by_key_via_sort(exec, keys, values, comparator); +} + +#undef KOKKOS_ONEDPL_HAS_SORT_BY_KEY + +} // namespace Kokkos::Impl +#endif diff --git a/algorithms/src/sorting/impl/Kokkos_SortImpl.hpp b/algorithms/src/sorting/impl/Kokkos_SortImpl.hpp new file mode 100644 index 0000000000..0894622891 --- /dev/null +++ b/algorithms/src/sorting/impl/Kokkos_SortImpl.hpp @@ -0,0 +1,416 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KOKKOS_SORT_FREE_FUNCS_IMPL_HPP_ +#define KOKKOS_SORT_FREE_FUNCS_IMPL_HPP_ + +#include "../Kokkos_BinOpsPublicAPI.hpp" +#include "../Kokkos_BinSortPublicAPI.hpp" +#include +#include +#include + +#if defined(KOKKOS_ENABLE_CUDA) + +// Workaround for `Instruction 'shfl' without '.sync' is not supported on +// .target sm_70 and higher from PTX ISA version 6.4`. +// Also see https://github.com/NVIDIA/cub/pull/170. +#if !defined(CUB_USE_COOPERATIVE_GROUPS) +#define CUB_USE_COOPERATIVE_GROUPS +#endif + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wshadow" + +#if defined(KOKKOS_COMPILER_CLANG) +// Some versions of Clang fail to compile Thrust, failing with errors like +// this: +// /thrust/system/cuda/detail/core/agent_launcher.h:557:11: +// error: use of undeclared identifier 'va_printf' +// The exact combination of versions for Clang and Thrust (or CUDA) for this +// failure was not investigated, however even very recent version combination +// (Clang 10.0.0 and Cuda 10.0) demonstrated failure. +// +// Defining _CubLog here locally allows us to avoid that code path, however +// disabling some debugging diagnostics +#pragma push_macro("_CubLog") +#ifdef _CubLog +#undef _CubLog +#endif +#define _CubLog +#include +#include +#pragma pop_macro("_CubLog") +#else +#include +#include +#endif + +#pragma GCC diagnostic pop + +#endif + +#if defined(KOKKOS_ENABLE_ROCTHRUST) +#include +#include +#endif + +#if defined(KOKKOS_ENABLE_ONEDPL) +#include +#include +#endif + +namespace Kokkos { +namespace Impl { + +template +struct better_off_calling_std_sort : std::false_type {}; + +#if defined KOKKOS_ENABLE_SERIAL +template <> +struct better_off_calling_std_sort : std::true_type {}; +#endif + +#if defined KOKKOS_ENABLE_OPENMP +template <> +struct better_off_calling_std_sort : std::true_type {}; +#endif + +#if defined KOKKOS_ENABLE_THREADS +template <> +struct better_off_calling_std_sort : std::true_type {}; +#endif + +#if defined KOKKOS_ENABLE_HPX +template <> +struct better_off_calling_std_sort : std::true_type { +}; +#endif + +template +inline constexpr bool better_off_calling_std_sort_v = + better_off_calling_std_sort::value; + +template +struct min_max_functor { + using minmax_scalar = + Kokkos::MinMaxScalar; + + ViewType view; + min_max_functor(const ViewType& view_) : view(view_) {} + + KOKKOS_INLINE_FUNCTION + void operator()(const size_t& i, minmax_scalar& minmax) const { + if (view(i) < minmax.min_val) minmax.min_val = view(i); + if (view(i) > minmax.max_val) minmax.max_val = view(i); + } +}; + +template +void sort_via_binsort(const ExecutionSpace& exec, + const Kokkos::View& view) { + // Although we are using BinSort below, which could work on rank-2 views, + // for now view must be rank-1 because the min_max_functor + // used below only works for rank-1 views + using ViewType = Kokkos::View; + static_assert(ViewType::rank == 1, + "Kokkos::sort: currently only supports rank-1 Views."); + + if (view.extent(0) <= 1) { + return; + } + + Kokkos::MinMaxScalar result; + Kokkos::MinMax reducer(result); + parallel_reduce("Kokkos::Sort::FindExtent", + Kokkos::RangePolicy( + exec, 0, view.extent(0)), + min_max_functor(view), reducer); + if (result.min_val == result.max_val) return; + // For integral types the number of bins may be larger than the range + // in which case we can exactly have one unique value per bin + // and then don't need to sort bins. + bool sort_in_bins = true; + // TODO: figure out better max_bins then this ... + int64_t max_bins = view.extent(0) / 2; + if (std::is_integral::value) { + // Cast to double to avoid possible overflow when using integer + auto const max_val = static_cast(result.max_val); + auto const min_val = static_cast(result.min_val); + // using 10M as the cutoff for special behavior (roughly 40MB for the count + // array) + if ((max_val - min_val) < 10000000) { + max_bins = max_val - min_val + 1; + sort_in_bins = false; + } + } + if (std::is_floating_point::value) { + KOKKOS_ASSERT(std::isfinite(static_cast(result.max_val) - + static_cast(result.min_val))); + } + + using CompType = BinOp1D; + BinSort bin_sort( + view, CompType(max_bins, result.min_val, result.max_val), sort_in_bins); + bin_sort.create_permute_vector(exec); + bin_sort.sort(exec, view); +} + +#if defined(KOKKOS_ENABLE_CUDA) +template +void sort_cudathrust(const Cuda& space, + const Kokkos::View& view, + MaybeComparator&&... maybeComparator) { + using ViewType = Kokkos::View; + static_assert(ViewType::rank == 1, + "Kokkos::sort: currently only supports rank-1 Views."); + + if (view.extent(0) <= 1) { + return; + } + const auto exec = thrust::cuda::par.on(space.cuda_stream()); + auto first = ::Kokkos::Experimental::begin(view); + auto last = ::Kokkos::Experimental::end(view); + thrust::sort(exec, first, last, + std::forward(maybeComparator)...); +} +#endif + +#if defined(KOKKOS_ENABLE_ROCTHRUST) +template +void sort_rocthrust(const HIP& space, + const Kokkos::View& view, + MaybeComparator&&... maybeComparator) { + using ViewType = Kokkos::View; + static_assert(ViewType::rank == 1, + "Kokkos::sort: currently only supports rank-1 Views."); + + if (view.extent(0) <= 1) { + return; + } + const auto exec = thrust::hip::par.on(space.hip_stream()); + auto first = ::Kokkos::Experimental::begin(view); + auto last = ::Kokkos::Experimental::end(view); + thrust::sort(exec, first, last, + std::forward(maybeComparator)...); +} +#endif + +#if defined(KOKKOS_ENABLE_ONEDPL) +template +void sort_onedpl(const Kokkos::Experimental::SYCL& space, + const Kokkos::View& view, + MaybeComparator&&... maybeComparator) { + using ViewType = Kokkos::View; + static_assert(SpaceAccessibility::accessible, + "SYCL execution space is not able to access the memory space " + "of the View argument!"); + + static_assert( + (ViewType::rank == 1) && + (std::is_same_v || + std::is_same_v || + std::is_same_v), + "SYCL sort only supports contiguous rank-1 Views with LayoutLeft, " + "LayoutRight or LayoutStride" + "For the latter, this means the View must have stride(0) = 1, enforced " + "at runtime."); + + if (view.stride(0) != 1) { + Kokkos::abort("SYCL sort only supports rank-1 Views with stride(0) = 1."); + } + + if (view.extent(0) <= 1) { + return; + } + + // Can't use Experimental::begin/end here since the oneDPL then assumes that + // the data is on the host. + auto queue = space.sycl_queue(); + auto policy = oneapi::dpl::execution::make_device_policy(queue); + const int n = view.extent(0); + oneapi::dpl::sort(policy, view.data(), view.data() + n, + std::forward(maybeComparator)...); +} +#endif + +template +void copy_to_host_run_stdsort_copy_back( + const ExecutionSpace& exec, + const Kokkos::View& view, + MaybeComparator&&... maybeComparator) { + namespace KE = ::Kokkos::Experimental; + + using ViewType = Kokkos::View; + using layout = typename ViewType::array_layout; + if constexpr (std::is_same_v) { + // for strided views we cannot just deep_copy from device to host, + // so we need to do a few more jumps + using view_value_type = typename ViewType::non_const_value_type; + using view_exespace = typename ViewType::execution_space; + using view_deep_copyable_t = Kokkos::View; + view_deep_copyable_t view_dc("view_dc", view.extent(0)); + KE::copy(exec, view, view_dc); + + // run sort on the mirror of view_dc + auto mv_h = create_mirror_view_and_copy(Kokkos::HostSpace(), view_dc); + auto first = KE::begin(mv_h); + auto last = KE::end(mv_h); + std::sort(first, last, std::forward(maybeComparator)...); + Kokkos::deep_copy(exec, view_dc, mv_h); + + // copy back to argument view + KE::copy(exec, KE::cbegin(view_dc), KE::cend(view_dc), KE::begin(view)); + } else { + auto view_h = create_mirror_view_and_copy(Kokkos::HostSpace(), view); + auto first = KE::begin(view_h); + auto last = KE::end(view_h); + std::sort(first, last, std::forward(maybeComparator)...); + Kokkos::deep_copy(exec, view, view_h); + } +} + +// -------------------------------------------------- +// +// specialize cases for sorting without comparator +// +// -------------------------------------------------- + +#if defined(KOKKOS_ENABLE_CUDA) +template +void sort_device_view_without_comparator( + const Cuda& exec, const Kokkos::View& view) { + sort_cudathrust(exec, view); +} +#endif + +#if defined(KOKKOS_ENABLE_ROCTHRUST) +template +void sort_device_view_without_comparator( + const HIP& exec, const Kokkos::View& view) { + sort_rocthrust(exec, view); +} +#endif + +#if defined(KOKKOS_ENABLE_ONEDPL) +template +void sort_device_view_without_comparator( + const Kokkos::Experimental::SYCL& exec, + const Kokkos::View& view) { + using ViewType = Kokkos::View; + static_assert( + (ViewType::rank == 1) && + (std::is_same_v || + std::is_same_v || + std::is_same_v), + "sort_device_view_without_comparator: supports rank-1 Views " + "with LayoutLeft, LayoutRight or LayoutStride"); + + if (view.stride(0) == 1) { + sort_onedpl(exec, view); + } else { + copy_to_host_run_stdsort_copy_back(exec, view); + } +} +#endif + +// fallback case +template +std::enable_if_t::value> +sort_device_view_without_comparator( + const ExecutionSpace& exec, + const Kokkos::View& view) { + sort_via_binsort(exec, view); +} + +// -------------------------------------------------- +// +// specialize cases for sorting with comparator +// +// -------------------------------------------------- + +#if defined(KOKKOS_ENABLE_CUDA) +template +void sort_device_view_with_comparator( + const Cuda& exec, const Kokkos::View& view, + const ComparatorType& comparator) { + sort_cudathrust(exec, view, comparator); +} +#endif + +#if defined(KOKKOS_ENABLE_ROCTHRUST) +template +void sort_device_view_with_comparator( + const HIP& exec, const Kokkos::View& view, + const ComparatorType& comparator) { + sort_rocthrust(exec, view, comparator); +} +#endif + +#if defined(KOKKOS_ENABLE_ONEDPL) +template +void sort_device_view_with_comparator( + const Kokkos::Experimental::SYCL& exec, + const Kokkos::View& view, + const ComparatorType& comparator) { + using ViewType = Kokkos::View; + static_assert( + (ViewType::rank == 1) && + (std::is_same_v || + std::is_same_v || + std::is_same_v), + "sort_device_view_with_comparator: supports rank-1 Views " + "with LayoutLeft, LayoutRight or LayoutStride"); + + if (view.stride(0) == 1) { + sort_onedpl(exec, view, comparator); + } else { + copy_to_host_run_stdsort_copy_back(exec, view, comparator); + } +} +#endif + +template +std::enable_if_t::value> +sort_device_view_with_comparator( + const ExecutionSpace& exec, + const Kokkos::View& view, + const ComparatorType& comparator) { + // This is a fallback case if a more specialized overload does not exist: + // for now, this fallback copies data to host, runs std::sort + // and then copies data back. Potentially, this can later be changed + // with a better solution like our own quicksort on device or similar. + + using ViewType = Kokkos::View; + using MemSpace = typename ViewType::memory_space; +// Note with HIP unified memory this code path is still the right thing to do +// if we end up here when RocThrust is not enabled. +// The create_mirror_view_and_copy will do the right thing (no copy). +#ifndef KOKKOS_ENABLE_IMPL_HIP_UNIFIED_MEMORY + static_assert(!SpaceAccessibility::accessible, + "Impl::sort_device_view_with_comparator: should not be called " + "on a view that is already accessible on the host"); +#endif + + copy_to_host_run_stdsort_copy_back(exec, view, comparator); +} + +} // namespace Impl +} // namespace Kokkos +#endif diff --git a/algorithms/src/std_algorithms/Kokkos_AdjacentDifference.hpp b/algorithms/src/std_algorithms/Kokkos_AdjacentDifference.hpp index 38dcd1a674..f254686dba 100644 --- a/algorithms/src/std_algorithms/Kokkos_AdjacentDifference.hpp +++ b/algorithms/src/std_algorithms/Kokkos_AdjacentDifference.hpp @@ -23,64 +23,85 @@ namespace Kokkos { namespace Experimental { -template -std::enable_if_t::value, - OutputIteratorType> -adjacent_difference(const ExecutionSpace& ex, InputIteratorType first_from, - InputIteratorType last_from, - OutputIteratorType first_dest) { +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename InputIteratorType, + typename OutputIteratorType, + std::enable_if_t::value && + ::Kokkos::is_execution_space::value, + int> = 0> +OutputIteratorType adjacent_difference(const ExecutionSpace& ex, + InputIteratorType first_from, + InputIteratorType last_from, + OutputIteratorType first_dest) { using value_type1 = typename InputIteratorType::value_type; using value_type2 = typename OutputIteratorType::value_type; using binary_op = Impl::StdAdjacentDifferenceDefaultBinaryOpFunctor; - return Impl::adjacent_difference_impl( + return Impl::adjacent_difference_exespace_impl( "Kokkos::adjacent_difference_iterator_api", ex, first_from, last_from, first_dest, binary_op()); } -template -std::enable_if_t::value, - OutputIteratorType> -adjacent_difference(const ExecutionSpace& ex, InputIteratorType first_from, - InputIteratorType last_from, OutputIteratorType first_dest, - BinaryOp bin_op) { - return Impl::adjacent_difference_impl( +template < + typename ExecutionSpace, typename InputIteratorType, + typename OutputIteratorType, typename BinaryOp, + std::enable_if_t::value && + ::Kokkos::is_execution_space::value, + int> = 0> +OutputIteratorType adjacent_difference(const ExecutionSpace& ex, + InputIteratorType first_from, + InputIteratorType last_from, + OutputIteratorType first_dest, + BinaryOp bin_op) { + return Impl::adjacent_difference_exespace_impl( "Kokkos::adjacent_difference_iterator_api", ex, first_from, last_from, first_dest, bin_op); } -template -std::enable_if_t::value, - OutputIteratorType> -adjacent_difference(const std::string& label, const ExecutionSpace& ex, - InputIteratorType first_from, InputIteratorType last_from, - OutputIteratorType first_dest) { +template < + typename ExecutionSpace, typename InputIteratorType, + typename OutputIteratorType, + std::enable_if_t::value && + ::Kokkos::is_execution_space::value, + int> = 0> +OutputIteratorType adjacent_difference(const std::string& label, + const ExecutionSpace& ex, + InputIteratorType first_from, + InputIteratorType last_from, + OutputIteratorType first_dest) { using value_type1 = typename InputIteratorType::value_type; using value_type2 = typename OutputIteratorType::value_type; using binary_op = Impl::StdAdjacentDifferenceDefaultBinaryOpFunctor; - return Impl::adjacent_difference_impl(label, ex, first_from, last_from, - first_dest, binary_op()); + return Impl::adjacent_difference_exespace_impl( + label, ex, first_from, last_from, first_dest, binary_op()); } -template -std::enable_if_t::value, - OutputIteratorType> -adjacent_difference(const std::string& label, const ExecutionSpace& ex, - InputIteratorType first_from, InputIteratorType last_from, - OutputIteratorType first_dest, BinaryOp bin_op) { - return Impl::adjacent_difference_impl(label, ex, first_from, last_from, - first_dest, bin_op); +template < + typename ExecutionSpace, typename InputIteratorType, + typename OutputIteratorType, typename BinaryOp, + std::enable_if_t::value && + ::Kokkos::is_execution_space::value, + int> = 0> +OutputIteratorType adjacent_difference(const std::string& label, + const ExecutionSpace& ex, + InputIteratorType first_from, + InputIteratorType last_from, + OutputIteratorType first_dest, + BinaryOp bin_op) { + return Impl::adjacent_difference_exespace_impl(label, ex, first_from, + last_from, first_dest, bin_op); } -template +template ::value, + int> = 0> auto adjacent_difference( const ExecutionSpace& ex, const ::Kokkos::View& view_from, @@ -96,13 +117,15 @@ auto adjacent_difference( using binary_op = Impl::StdAdjacentDifferenceDefaultBinaryOpFunctor; - return Impl::adjacent_difference_impl( + return Impl::adjacent_difference_exespace_impl( "Kokkos::adjacent_difference_view_api", ex, KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), binary_op()); } -template +template ::value, + int> = 0> auto adjacent_difference( const ExecutionSpace& ex, const ::Kokkos::View& view_from, @@ -111,13 +134,15 @@ auto adjacent_difference( namespace KE = ::Kokkos::Experimental; Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); - return Impl::adjacent_difference_impl( + return Impl::adjacent_difference_exespace_impl( "Kokkos::adjacent_difference_view_api", ex, KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), bin_op); } -template +template ::value, + int> = 0> auto adjacent_difference( const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view_from, @@ -134,13 +159,15 @@ auto adjacent_difference( Impl::StdAdjacentDifferenceDefaultBinaryOpFunctor; - return Impl::adjacent_difference_impl(label, ex, KE::cbegin(view_from), - KE::cend(view_from), - KE::begin(view_dest), binary_op()); + return Impl::adjacent_difference_exespace_impl( + label, ex, KE::cbegin(view_from), KE::cend(view_from), + KE::begin(view_dest), binary_op()); } -template +template ::value, + int> = 0> auto adjacent_difference( const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view_from, @@ -149,9 +176,85 @@ auto adjacent_difference( namespace KE = ::Kokkos::Experimental; Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); - return Impl::adjacent_difference_impl(label, ex, KE::cbegin(view_from), - KE::cend(view_from), - KE::begin(view_dest), bin_op); + return Impl::adjacent_difference_exespace_impl( + label, ex, KE::cbegin(view_from), KE::cend(view_from), + KE::begin(view_dest), bin_op); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template ::value && + ::Kokkos::is_team_handle::value, + int> = 0> +KOKKOS_FUNCTION OutputIteratorType adjacent_difference( + const TeamHandleType& teamHandle, InputIteratorType first_from, + InputIteratorType last_from, OutputIteratorType first_dest) { + using value_type1 = typename InputIteratorType::value_type; + using value_type2 = typename OutputIteratorType::value_type; + using binary_op = + Impl::StdAdjacentDifferenceDefaultBinaryOpFunctor; + return Impl::adjacent_difference_team_impl(teamHandle, first_from, last_from, + first_dest, binary_op()); +} + +template ::value && + ::Kokkos::is_team_handle::value, + int> = 0> +KOKKOS_FUNCTION OutputIteratorType +adjacent_difference(const TeamHandleType& teamHandle, + InputIteratorType first_from, InputIteratorType last_from, + OutputIteratorType first_dest, BinaryOp bin_op) { + return Impl::adjacent_difference_team_impl(teamHandle, first_from, last_from, + first_dest, bin_op); +} + +template < + typename TeamHandleType, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_team_handle::value, int> = 0> +KOKKOS_FUNCTION auto adjacent_difference( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view_from, + const ::Kokkos::View& view_dest) { + namespace KE = ::Kokkos::Experimental; + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + + using view_type1 = ::Kokkos::View; + using view_type2 = ::Kokkos::View; + using value_type1 = typename view_type1::value_type; + using value_type2 = typename view_type2::value_type; + using binary_op = + Impl::StdAdjacentDifferenceDefaultBinaryOpFunctor; + return Impl::adjacent_difference_team_impl(teamHandle, KE::cbegin(view_from), + KE::cend(view_from), + KE::begin(view_dest), binary_op()); +} + +template < + typename TeamHandleType, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename BinaryOp, + std::enable_if_t<::Kokkos::is_team_handle::value, int> = 0> +KOKKOS_FUNCTION auto adjacent_difference( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view_from, + const ::Kokkos::View& view_dest, + BinaryOp bin_op) { + namespace KE = ::Kokkos::Experimental; + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + return Impl::adjacent_difference_team_impl(teamHandle, KE::cbegin(view_from), + KE::cend(view_from), + KE::begin(view_dest), bin_op); } } // namespace Experimental diff --git a/algorithms/src/std_algorithms/Kokkos_AdjacentFind.hpp b/algorithms/src/std_algorithms/Kokkos_AdjacentFind.hpp index 43c2b66010..ac476ca5bf 100644 --- a/algorithms/src/std_algorithms/Kokkos_AdjacentFind.hpp +++ b/algorithms/src/std_algorithms/Kokkos_AdjacentFind.hpp @@ -23,71 +23,144 @@ namespace Kokkos { namespace Experimental { +// +// overload set accepting execution space +// + // overload set1 -template +template < + typename ExecutionSpace, typename IteratorType, + std::enable_if_t, int> = 0> IteratorType adjacent_find(const ExecutionSpace& ex, IteratorType first, IteratorType last) { - return Impl::adjacent_find_impl("Kokkos::adjacent_find_iterator_api_default", - ex, first, last); + return Impl::adjacent_find_exespace_impl( + "Kokkos::adjacent_find_iterator_api_default", ex, first, last); } -template +template < + typename ExecutionSpace, typename IteratorType, + std::enable_if_t, int> = 0> IteratorType adjacent_find(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last) { - return Impl::adjacent_find_impl(label, ex, first, last); + return Impl::adjacent_find_exespace_impl(label, ex, first, last); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + std::enable_if_t, int> = 0> auto adjacent_find(const ExecutionSpace& ex, const ::Kokkos::View& v) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; - return Impl::adjacent_find_impl("Kokkos::adjacent_find_view_api_default", ex, - KE::begin(v), KE::end(v)); + return Impl::adjacent_find_exespace_impl( + "Kokkos::adjacent_find_view_api_default", ex, KE::begin(v), KE::end(v)); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + std::enable_if_t, int> = 0> auto adjacent_find(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& v) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; - return Impl::adjacent_find_impl(label, ex, KE::begin(v), KE::end(v)); + return Impl::adjacent_find_exespace_impl(label, ex, KE::begin(v), KE::end(v)); } // overload set2 -template +template < + typename ExecutionSpace, typename IteratorType, + typename BinaryPredicateType, + std::enable_if_t, int> = 0> IteratorType adjacent_find(const ExecutionSpace& ex, IteratorType first, IteratorType last, BinaryPredicateType pred) { - return Impl::adjacent_find_impl("Kokkos::adjacent_find_iterator_api_default", - ex, first, last, pred); + return Impl::adjacent_find_exespace_impl( + "Kokkos::adjacent_find_iterator_api_default", ex, first, last, pred); } -template +template < + typename ExecutionSpace, typename IteratorType, + typename BinaryPredicateType, + std::enable_if_t, int> = 0> IteratorType adjacent_find(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, BinaryPredicateType pred) { - return Impl::adjacent_find_impl(label, ex, first, last, pred); + return Impl::adjacent_find_exespace_impl(label, ex, first, last, pred); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename BinaryPredicateType, + std::enable_if_t, int> = 0> auto adjacent_find(const ExecutionSpace& ex, const ::Kokkos::View& v, BinaryPredicateType pred) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; - return Impl::adjacent_find_impl("Kokkos::adjacent_find_view_api_default", ex, - KE::begin(v), KE::end(v), pred); + return Impl::adjacent_find_exespace_impl( + "Kokkos::adjacent_find_view_api_default", ex, KE::begin(v), KE::end(v), + pred); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename BinaryPredicateType, + std::enable_if_t, int> = 0> auto adjacent_find(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& v, BinaryPredicateType pred) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; - return Impl::adjacent_find_impl(label, ex, KE::begin(v), KE::end(v), pred); + return Impl::adjacent_find_exespace_impl(label, ex, KE::begin(v), KE::end(v), + pred); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// + +// overload set1 +template , int> = 0> +KOKKOS_FUNCTION IteratorType adjacent_find(const TeamHandleType& teamHandle, + IteratorType first, + IteratorType last) { + return Impl::adjacent_find_team_impl(teamHandle, first, last); +} + +template , int> = 0> +KOKKOS_FUNCTION auto adjacent_find( + const TeamHandleType& teamHandle, + const ::Kokkos::View& v) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + namespace KE = ::Kokkos::Experimental; + return Impl::adjacent_find_team_impl(teamHandle, KE::begin(v), KE::end(v)); +} + +// overload set2 +template , int> = 0> +KOKKOS_FUNCTION IteratorType adjacent_find(const TeamHandleType& teamHandle, + IteratorType first, + IteratorType last, + BinaryPredicateType pred) { + return Impl::adjacent_find_team_impl(teamHandle, first, last, pred); +} + +template , int> = 0> +KOKKOS_FUNCTION auto adjacent_find( + const TeamHandleType& teamHandle, + const ::Kokkos::View& v, + BinaryPredicateType pred) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + namespace KE = ::Kokkos::Experimental; + return Impl::adjacent_find_team_impl(teamHandle, KE::begin(v), KE::end(v), + pred); } } // namespace Experimental diff --git a/algorithms/src/std_algorithms/Kokkos_AllOf.hpp b/algorithms/src/std_algorithms/Kokkos_AllOf.hpp index 2ffec7e144..d6ed4c4a7e 100644 --- a/algorithms/src/std_algorithms/Kokkos_AllOf.hpp +++ b/algorithms/src/std_algorithms/Kokkos_AllOf.hpp @@ -23,41 +23,79 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename InputIterator, typename Predicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool all_of(const ExecutionSpace& ex, InputIterator first, InputIterator last, Predicate predicate) { - return Impl::all_of_impl("Kokkos::all_of_iterator_api_default", ex, first, - last, predicate); + return Impl::all_of_exespace_impl("Kokkos::all_of_iterator_api_default", ex, + first, last, predicate); } -template +template < + typename ExecutionSpace, typename InputIterator, typename Predicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool all_of(const std::string& label, const ExecutionSpace& ex, InputIterator first, InputIterator last, Predicate predicate) { - return Impl::all_of_impl(label, ex, first, last, predicate); + return Impl::all_of_exespace_impl(label, ex, first, last, predicate); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename Predicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool all_of(const ExecutionSpace& ex, const ::Kokkos::View& v, Predicate predicate) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; - return Impl::all_of_impl("Kokkos::all_of_view_api_default", ex, KE::cbegin(v), - KE::cend(v), std::move(predicate)); + return Impl::all_of_exespace_impl("Kokkos::all_of_view_api_default", ex, + KE::cbegin(v), KE::cend(v), + std::move(predicate)); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename Predicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool all_of(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& v, Predicate predicate) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; - return Impl::all_of_impl(label, ex, KE::cbegin(v), KE::cend(v), - std::move(predicate)); + return Impl::all_of_exespace_impl(label, ex, KE::cbegin(v), KE::cend(v), + std::move(predicate)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION bool all_of(const TeamHandleType& teamHandle, + InputIterator first, InputIterator last, + Predicate predicate) { + return Impl::all_of_team_impl(teamHandle, first, last, predicate); +} + +template , int> = 0> +KOKKOS_FUNCTION bool all_of(const TeamHandleType& teamHandle, + const ::Kokkos::View& v, + Predicate predicate) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + + namespace KE = ::Kokkos::Experimental; + return Impl::all_of_team_impl(teamHandle, KE::cbegin(v), KE::cend(v), + std::move(predicate)); } } // namespace Experimental diff --git a/algorithms/src/std_algorithms/Kokkos_AnyOf.hpp b/algorithms/src/std_algorithms/Kokkos_AnyOf.hpp index 019c466c6d..82356e6598 100644 --- a/algorithms/src/std_algorithms/Kokkos_AnyOf.hpp +++ b/algorithms/src/std_algorithms/Kokkos_AnyOf.hpp @@ -23,41 +23,79 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename InputIterator, typename Predicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool any_of(const ExecutionSpace& ex, InputIterator first, InputIterator last, Predicate predicate) { - return Impl::any_of_impl("Kokkos::any_of_view_api_default", ex, first, last, - predicate); + return Impl::any_of_exespace_impl("Kokkos::any_of_view_api_default", ex, + first, last, predicate); } -template +template < + typename ExecutionSpace, typename InputIterator, typename Predicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool any_of(const std::string& label, const ExecutionSpace& ex, InputIterator first, InputIterator last, Predicate predicate) { - return Impl::any_of_impl(label, ex, first, last, predicate); + return Impl::any_of_exespace_impl(label, ex, first, last, predicate); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename Predicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool any_of(const ExecutionSpace& ex, const ::Kokkos::View& v, Predicate predicate) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; - return Impl::any_of_impl("Kokkos::any_of_view_api_default", ex, KE::cbegin(v), - KE::cend(v), std::move(predicate)); + return Impl::any_of_exespace_impl("Kokkos::any_of_view_api_default", ex, + KE::cbegin(v), KE::cend(v), + std::move(predicate)); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename Predicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool any_of(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& v, Predicate predicate) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; - return Impl::any_of_impl(label, ex, KE::cbegin(v), KE::cend(v), - std::move(predicate)); + return Impl::any_of_exespace_impl(label, ex, KE::cbegin(v), KE::cend(v), + std::move(predicate)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION bool any_of(const TeamHandleType& teamHandle, + InputIterator first, InputIterator last, + Predicate predicate) { + return Impl::any_of_team_impl(teamHandle, first, last, predicate); +} + +template , int> = 0> +KOKKOS_FUNCTION bool any_of(const TeamHandleType& teamHandle, + const ::Kokkos::View& v, + Predicate predicate) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + + namespace KE = ::Kokkos::Experimental; + return Impl::any_of_team_impl(teamHandle, KE::cbegin(v), KE::cend(v), + std::move(predicate)); } } // namespace Experimental diff --git a/algorithms/src/std_algorithms/Kokkos_Copy.hpp b/algorithms/src/std_algorithms/Kokkos_Copy.hpp index 028f3b66b2..c5406c72b0 100644 --- a/algorithms/src/std_algorithms/Kokkos_Copy.hpp +++ b/algorithms/src/std_algorithms/Kokkos_Copy.hpp @@ -23,44 +23,83 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator copy(const ExecutionSpace& ex, InputIterator first, InputIterator last, OutputIterator d_first) { - return Impl::copy_impl("Kokkos::copy_iterator_api_default", ex, first, last, - d_first); + return Impl::copy_exespace_impl("Kokkos::copy_iterator_api_default", ex, + first, last, d_first); } -template +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator copy(const std::string& label, const ExecutionSpace& ex, InputIterator first, InputIterator last, OutputIterator d_first) { - return Impl::copy_impl(label, ex, first, last, d_first); + return Impl::copy_exespace_impl(label, ex, first, last, d_first); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto copy(const ExecutionSpace& ex, const ::Kokkos::View& source, - ::Kokkos::View& dest) { + const ::Kokkos::View& dest) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); namespace KE = ::Kokkos::Experimental; - return Impl::copy_impl("Kokkos::copy_view_api_default", ex, - KE::cbegin(source), KE::cend(source), KE::begin(dest)); + return Impl::copy_exespace_impl("Kokkos::copy_view_api_default", ex, + KE::cbegin(source), KE::cend(source), + KE::begin(dest)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto copy(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& source, - ::Kokkos::View& dest) { + const ::Kokkos::View& dest) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); + + namespace KE = ::Kokkos::Experimental; + return Impl::copy_exespace_impl(label, ex, KE::cbegin(source), + KE::cend(source), KE::begin(dest)); +} + +// +// overload set accepting team handle +// +template , int> = 0> +KOKKOS_FUNCTION OutputIterator copy(const TeamHandleType& teamHandle, + InputIterator first, InputIterator last, + OutputIterator d_first) { + return Impl::copy_team_impl(teamHandle, first, last, d_first); +} + +template , int> = 0> +KOKKOS_FUNCTION auto copy( + const TeamHandleType& teamHandle, + const ::Kokkos::View& source, + const ::Kokkos::View& dest) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); namespace KE = ::Kokkos::Experimental; - return Impl::copy_impl(label, ex, KE::cbegin(source), KE::cend(source), - KE::begin(dest)); + return Impl::copy_team_impl(teamHandle, KE::cbegin(source), KE::cend(source), + KE::begin(dest)); } } // namespace Experimental diff --git a/algorithms/src/std_algorithms/Kokkos_CopyBackward.hpp b/algorithms/src/std_algorithms/Kokkos_CopyBackward.hpp index deff6baf9a..82071a9362 100644 --- a/algorithms/src/std_algorithms/Kokkos_CopyBackward.hpp +++ b/algorithms/src/std_algorithms/Kokkos_CopyBackward.hpp @@ -23,42 +23,81 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename IteratorType1, typename IteratorType2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType2 copy_backward(const ExecutionSpace& ex, IteratorType1 first, IteratorType1 last, IteratorType2 d_last) { - return Impl::copy_backward_impl("Kokkos::copy_backward_iterator_api_default", - ex, first, last, d_last); + return Impl::copy_backward_exespace_impl( + "Kokkos::copy_backward_iterator_api_default", ex, first, last, d_last); } -template +template < + typename ExecutionSpace, typename IteratorType1, typename IteratorType2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType2 copy_backward(const std::string& label, const ExecutionSpace& ex, IteratorType1 first, IteratorType1 last, IteratorType2 d_last) { - return Impl::copy_backward_impl(label, ex, first, last, d_last); + return Impl::copy_backward_exespace_impl(label, ex, first, last, d_last); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto copy_backward(const ExecutionSpace& ex, const ::Kokkos::View& source, - ::Kokkos::View& dest) { + const ::Kokkos::View& dest) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - return Impl::copy_backward_impl("Kokkos::copy_backward_view_api_default", ex, - cbegin(source), cend(source), end(dest)); + return Impl::copy_backward_exespace_impl( + "Kokkos::copy_backward_view_api_default", ex, cbegin(source), + cend(source), end(dest)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto copy_backward(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& source, - ::Kokkos::View& dest) { + const ::Kokkos::View& dest) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); + + return Impl::copy_backward_exespace_impl(label, ex, cbegin(source), + cend(source), end(dest)); +} + +// +// overload set accepting team handle +// +template , int> = 0> +KOKKOS_FUNCTION IteratorType2 copy_backward(const TeamHandleType& teamHandle, + IteratorType1 first, + IteratorType1 last, + IteratorType2 d_last) { + return Impl::copy_backward_team_impl(teamHandle, first, last, d_last); +} + +template , int> = 0> +KOKKOS_FUNCTION auto copy_backward( + const TeamHandleType& teamHandle, + const ::Kokkos::View& source, + const ::Kokkos::View& dest) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - return Impl::copy_backward_impl(label, ex, cbegin(source), cend(source), - end(dest)); + return Impl::copy_backward_team_impl(teamHandle, cbegin(source), cend(source), + end(dest)); } } // namespace Experimental diff --git a/algorithms/src/std_algorithms/Kokkos_CopyIf.hpp b/algorithms/src/std_algorithms/Kokkos_CopyIf.hpp index 3db2fc074f..599fde5737 100644 --- a/algorithms/src/std_algorithms/Kokkos_CopyIf.hpp +++ b/algorithms/src/std_algorithms/Kokkos_CopyIf.hpp @@ -23,46 +23,87 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + typename Predicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator copy_if(const ExecutionSpace& ex, InputIterator first, InputIterator last, OutputIterator d_first, Predicate pred) { - return Impl::copy_if_impl("Kokkos::copy_if_iterator_api_default", ex, first, - last, d_first, std::move(pred)); + return Impl::copy_if_exespace_impl("Kokkos::copy_if_iterator_api_default", ex, + first, last, d_first, std::move(pred)); } -template +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + typename Predicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator copy_if(const std::string& label, const ExecutionSpace& ex, InputIterator first, InputIterator last, OutputIterator d_first, Predicate pred) { - return Impl::copy_if_impl(label, ex, first, last, d_first, std::move(pred)); + return Impl::copy_if_exespace_impl(label, ex, first, last, d_first, + std::move(pred)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename Predicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto copy_if(const ExecutionSpace& ex, const ::Kokkos::View& source, - ::Kokkos::View& dest, Predicate pred) { + const ::Kokkos::View& dest, + Predicate pred) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - return Impl::copy_if_impl("Kokkos::copy_if_view_api_default", ex, - cbegin(source), cend(source), begin(dest), - std::move(pred)); + return Impl::copy_if_exespace_impl("Kokkos::copy_if_view_api_default", ex, + cbegin(source), cend(source), begin(dest), + std::move(pred)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename Predicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto copy_if(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& source, - ::Kokkos::View& dest, Predicate pred) { + const ::Kokkos::View& dest, + Predicate pred) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); + + return Impl::copy_if_exespace_impl(label, ex, cbegin(source), cend(source), + begin(dest), std::move(pred)); +} + +// +// overload set accepting team handle +// +template , int> = 0> +KOKKOS_FUNCTION OutputIterator copy_if(const TeamHandleType& teamHandle, + InputIterator first, InputIterator last, + OutputIterator d_first, Predicate pred) { + return Impl::copy_if_team_impl(teamHandle, first, last, d_first, + std::move(pred)); +} + +template , int> = 0> +KOKKOS_FUNCTION auto copy_if( + const TeamHandleType& teamHandle, + const ::Kokkos::View& source, + const ::Kokkos::View& dest, Predicate pred) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - return Impl::copy_if_impl(label, ex, cbegin(source), cend(source), - begin(dest), std::move(pred)); + return Impl::copy_if_team_impl(teamHandle, cbegin(source), cend(source), + begin(dest), std::move(pred)); } } // namespace Experimental diff --git a/algorithms/src/std_algorithms/Kokkos_CopyN.hpp b/algorithms/src/std_algorithms/Kokkos_CopyN.hpp index a64f99b5c0..637d8d4cbc 100644 --- a/algorithms/src/std_algorithms/Kokkos_CopyN.hpp +++ b/algorithms/src/std_algorithms/Kokkos_CopyN.hpp @@ -23,45 +23,83 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename InputIterator, typename Size, + typename OutputIterator, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator copy_n(const ExecutionSpace& ex, InputIterator first, Size count, OutputIterator result) { - return Impl::copy_n_impl("Kokkos::copy_n_iterator_api_default", ex, first, - count, result); + return Impl::copy_n_exespace_impl("Kokkos::copy_n_iterator_api_default", ex, + first, count, result); } -template +template < + typename ExecutionSpace, typename InputIterator, typename Size, + typename OutputIterator, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator copy_n(const std::string& label, const ExecutionSpace& ex, InputIterator first, Size count, OutputIterator result) { - return Impl::copy_n_impl(label, ex, first, count, result); + return Impl::copy_n_exespace_impl(label, ex, first, count, result); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename Size, typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto copy_n(const ExecutionSpace& ex, const ::Kokkos::View& source, Size count, - ::Kokkos::View& dest) { + const ::Kokkos::View& dest) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); namespace KE = ::Kokkos::Experimental; - return Impl::copy_n_impl("Kokkos::copy_n_view_api_default", ex, - KE::cbegin(source), count, KE::begin(dest)); + return Impl::copy_n_exespace_impl("Kokkos::copy_n_view_api_default", ex, + KE::cbegin(source), count, KE::begin(dest)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename Size, typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto copy_n(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& source, Size count, - ::Kokkos::View& dest) { + const ::Kokkos::View& dest) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); + + namespace KE = ::Kokkos::Experimental; + return Impl::copy_n_exespace_impl(label, ex, KE::cbegin(source), count, + KE::begin(dest)); +} + +// +// overload set accepting team handle +// +template , int> = 0> +KOKKOS_FUNCTION OutputIterator copy_n(const TeamHandleType& teamHandle, + InputIterator first, Size count, + OutputIterator result) { + return Impl::copy_n_team_impl(teamHandle, first, count, result); +} + +template , int> = 0> +KOKKOS_FUNCTION auto copy_n( + const TeamHandleType& teamHandle, + const ::Kokkos::View& source, Size count, + const ::Kokkos::View& dest) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); namespace KE = ::Kokkos::Experimental; - return Impl::copy_n_impl(label, ex, KE::cbegin(source), count, - KE::begin(dest)); + return Impl::copy_n_team_impl(teamHandle, KE::cbegin(source), count, + KE::begin(dest)); } } // namespace Experimental diff --git a/algorithms/src/std_algorithms/Kokkos_Count.hpp b/algorithms/src/std_algorithms/Kokkos_Count.hpp index 3ac63467ec..f179e88bab 100644 --- a/algorithms/src/std_algorithms/Kokkos_Count.hpp +++ b/algorithms/src/std_algorithms/Kokkos_Count.hpp @@ -23,41 +23,81 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename IteratorType, typename T, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> typename IteratorType::difference_type count(const ExecutionSpace& ex, IteratorType first, IteratorType last, const T& value) { - return Impl::count_impl("Kokkos::count_iterator_api_default", ex, first, last, - value); + return Impl::count_exespace_impl("Kokkos::count_iterator_api_default", ex, + first, last, value); } -template +template < + typename ExecutionSpace, typename IteratorType, typename T, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> typename IteratorType::difference_type count(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, const T& value) { - return Impl::count_impl(label, ex, first, last, value); + return Impl::count_exespace_impl(label, ex, first, last, value); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename T, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto count(const ExecutionSpace& ex, const ::Kokkos::View& v, const T& value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; - return Impl::count_impl("Kokkos::count_view_api_default", ex, KE::cbegin(v), - KE::cend(v), value); + return Impl::count_exespace_impl("Kokkos::count_view_api_default", ex, + KE::cbegin(v), KE::cend(v), value); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename T, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto count(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& v, const T& value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; - return Impl::count_impl(label, ex, KE::cbegin(v), KE::cend(v), value); + return Impl::count_exespace_impl(label, ex, KE::cbegin(v), KE::cend(v), + value); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// + +template , int> = 0> +KOKKOS_FUNCTION typename IteratorType::difference_type count( + const TeamHandleType& teamHandle, IteratorType first, IteratorType last, + const T& value) { + return Impl::count_team_impl(teamHandle, first, last, value); +} + +template , int> = 0> +KOKKOS_FUNCTION auto count(const TeamHandleType& teamHandle, + const ::Kokkos::View& v, + const T& value) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + + namespace KE = ::Kokkos::Experimental; + return Impl::count_team_impl(teamHandle, KE::cbegin(v), KE::cend(v), value); } } // namespace Experimental diff --git a/algorithms/src/std_algorithms/Kokkos_CountIf.hpp b/algorithms/src/std_algorithms/Kokkos_CountIf.hpp index b9731d378a..967cf75e7a 100644 --- a/algorithms/src/std_algorithms/Kokkos_CountIf.hpp +++ b/algorithms/src/std_algorithms/Kokkos_CountIf.hpp @@ -23,46 +23,84 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename IteratorType, typename Predicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> typename IteratorType::difference_type count_if(const ExecutionSpace& ex, IteratorType first, IteratorType last, Predicate predicate) { - return Impl::count_if_impl("Kokkos::count_if_iterator_api_default", ex, first, - last, std::move(predicate)); + return Impl::count_if_exespace_impl("Kokkos::count_if_iterator_api_default", + ex, first, last, std::move(predicate)); } -template +template < + typename ExecutionSpace, typename IteratorType, typename Predicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> typename IteratorType::difference_type count_if(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, Predicate predicate) { - return Impl::count_if_impl(label, ex, first, last, std::move(predicate)); + return Impl::count_if_exespace_impl(label, ex, first, last, + std::move(predicate)); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename Predicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto count_if(const ExecutionSpace& ex, const ::Kokkos::View& v, Predicate predicate) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; - return Impl::count_if_impl("Kokkos::count_if_view_api_default", ex, - KE::cbegin(v), KE::cend(v), std::move(predicate)); + return Impl::count_if_exespace_impl("Kokkos::count_if_view_api_default", ex, + KE::cbegin(v), KE::cend(v), + std::move(predicate)); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename Predicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto count_if(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& v, Predicate predicate) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; - return Impl::count_if_impl(label, ex, KE::cbegin(v), KE::cend(v), - std::move(predicate)); + return Impl::count_if_exespace_impl(label, ex, KE::cbegin(v), KE::cend(v), + std::move(predicate)); +} + +// +// overload set accepting team handle +// +template , int> = 0> +KOKKOS_FUNCTION typename IteratorType::difference_type count_if( + const TeamHandleType& teamHandle, IteratorType first, IteratorType last, + Predicate predicate) { + return Impl::count_if_team_impl(teamHandle, first, last, + std::move(predicate)); +} + +template , int> = 0> +KOKKOS_FUNCTION auto count_if(const TeamHandleType& teamHandle, + const ::Kokkos::View& v, + Predicate predicate) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + + namespace KE = ::Kokkos::Experimental; + return Impl::count_if_team_impl(teamHandle, KE::cbegin(v), KE::cend(v), + std::move(predicate)); } } // namespace Experimental diff --git a/algorithms/src/std_algorithms/Kokkos_Equal.hpp b/algorithms/src/std_algorithms/Kokkos_Equal.hpp index 37c0d75ef5..593c42f87e 100644 --- a/algorithms/src/std_algorithms/Kokkos_Equal.hpp +++ b/algorithms/src/std_algorithms/Kokkos_Equal.hpp @@ -23,145 +23,260 @@ namespace Kokkos { namespace Experimental { -template -std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< - IteratorType1, IteratorType2>::value, - bool> -equal(const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, - IteratorType2 first2) { - return Impl::equal_impl("Kokkos::equal_iterator_api_default", ex, first1, - last1, first2); -} - -template -std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< - IteratorType1, IteratorType2>::value, - bool> -equal(const std::string& label, const ExecutionSpace& ex, IteratorType1 first1, - IteratorType1 last1, IteratorType2 first2) { - return Impl::equal_impl(label, ex, first1, last1, first2); -} - -template -std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< - IteratorType1, IteratorType2>::value, - bool> -equal(const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, - IteratorType2 first2, BinaryPredicateType predicate) { - return Impl::equal_impl("Kokkos::equal_iterator_api_default", ex, first1, - last1, first2, std::move(predicate)); -} - -template -std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< - IteratorType1, IteratorType2>::value, - bool> -equal(const std::string& label, const ExecutionSpace& ex, IteratorType1 first1, - IteratorType1 last1, IteratorType2 first2, - BinaryPredicateType predicate) { - return Impl::equal_impl(label, ex, first1, last1, first2, - std::move(predicate)); -} - -template +// +// overload set accepting execution space +// +template && + Kokkos::is_execution_space_v, + int> = 0> +bool equal(const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2) { + return Impl::equal_exespace_impl("Kokkos::equal_iterator_api_default", ex, + first1, last1, first2); +} + +template && ::Kokkos:: + is_execution_space_v, + int> = 0> +bool equal(const std::string& label, const ExecutionSpace& ex, + IteratorType1 first1, IteratorType1 last1, IteratorType2 first2) { + return Impl::equal_exespace_impl(label, ex, first1, last1, first2); +} + +template && ::Kokkos:: + is_execution_space_v, + int> = 0> +bool equal(const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2, BinaryPredicateType predicate) { + return Impl::equal_exespace_impl("Kokkos::equal_iterator_api_default", ex, + first1, last1, first2, std::move(predicate)); +} + +template && ::Kokkos:: + is_execution_space_v, + int> = 0> +bool equal(const std::string& label, const ExecutionSpace& ex, + IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, + BinaryPredicateType predicate) { + return Impl::equal_exespace_impl(label, ex, first1, last1, first2, + std::move(predicate)); +} + +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool equal(const ExecutionSpace& ex, const ::Kokkos::View& view1, - ::Kokkos::View& view2) { + const ::Kokkos::View& view2) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); namespace KE = ::Kokkos::Experimental; - return Impl::equal_impl("Kokkos::equal_view_api_default", ex, - KE::cbegin(view1), KE::cend(view1), - KE::cbegin(view2)); + return Impl::equal_exespace_impl("Kokkos::equal_view_api_default", ex, + KE::cbegin(view1), KE::cend(view1), + KE::cbegin(view2)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool equal(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view1, - ::Kokkos::View& view2) { + const ::Kokkos::View& view2) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); namespace KE = ::Kokkos::Experimental; - return Impl::equal_impl(label, ex, KE::cbegin(view1), KE::cend(view1), - KE::cbegin(view2)); + return Impl::equal_exespace_impl(label, ex, KE::cbegin(view1), + KE::cend(view1), KE::cbegin(view2)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename BinaryPredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool equal(const ExecutionSpace& ex, const ::Kokkos::View& view1, - ::Kokkos::View& view2, + const ::Kokkos::View& view2, BinaryPredicateType predicate) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); namespace KE = ::Kokkos::Experimental; - return Impl::equal_impl("Kokkos::equal_view_api_default", ex, - KE::cbegin(view1), KE::cend(view1), KE::cbegin(view2), - std::move(predicate)); + return Impl::equal_exespace_impl("Kokkos::equal_view_api_default", ex, + KE::cbegin(view1), KE::cend(view1), + KE::cbegin(view2), std::move(predicate)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename BinaryPredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool equal(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view1, - ::Kokkos::View& view2, + const ::Kokkos::View& view2, + BinaryPredicateType predicate) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); + + namespace KE = ::Kokkos::Experimental; + return Impl::equal_exespace_impl(label, ex, KE::cbegin(view1), + KE::cend(view1), KE::cbegin(view2), + std::move(predicate)); +} + +template && ::Kokkos:: + is_execution_space_v, + int> = 0> +bool equal(const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2, IteratorType2 last2) { + return Impl::equal_exespace_impl("Kokkos::equal_iterator_api_default", ex, + first1, last1, first2, last2); +} + +template && ::Kokkos:: + is_execution_space_v, + int> = 0> +bool equal(const std::string& label, const ExecutionSpace& ex, + IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, + IteratorType2 last2) { + return Impl::equal_exespace_impl(label, ex, first1, last1, first2, last2); +} + +template && ::Kokkos:: + is_execution_space_v, + int> = 0> +bool equal(const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2, IteratorType2 last2, BinaryPredicateType predicate) { + return Impl::equal_exespace_impl("Kokkos::equal_iterator_api_default", ex, + first1, last1, first2, last2, + std::move(predicate)); +} + +template && ::Kokkos:: + is_execution_space_v, + int> = 0> +bool equal(const std::string& label, const ExecutionSpace& ex, + IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, + IteratorType2 last2, BinaryPredicateType predicate) { + return Impl::equal_exespace_impl(label, ex, first1, last1, first2, last2, + std::move(predicate)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template && ::Kokkos:: + is_team_handle_v, + int> = 0> +KOKKOS_FUNCTION bool equal(const TeamHandleType& teamHandle, + IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2) { + return Impl::equal_team_impl(teamHandle, first1, last1, first2); +} + +template && ::Kokkos:: + is_team_handle_v, + int> = 0> +KOKKOS_FUNCTION bool equal(const TeamHandleType& teamHandle, + IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2, + BinaryPredicateType predicate) { + return Impl::equal_team_impl(teamHandle, first1, last1, first2, + std::move(predicate)); +} + +template , int> = 0> +KOKKOS_FUNCTION bool equal( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view1, + const ::Kokkos::View& view2) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); namespace KE = ::Kokkos::Experimental; - return Impl::equal_impl(label, ex, KE::cbegin(view1), KE::cend(view1), - KE::cbegin(view2), std::move(predicate)); -} - -template -std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< - IteratorType1, IteratorType2>::value, - bool> -equal(const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, - IteratorType2 first2, IteratorType2 last2) { - return Impl::equal_impl("Kokkos::equal_iterator_api_default", ex, first1, - last1, first2, last2); -} - -template -std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< - IteratorType1, IteratorType2>::value, - bool> -equal(const std::string& label, const ExecutionSpace& ex, IteratorType1 first1, - IteratorType1 last1, IteratorType2 first2, IteratorType2 last2) { - return Impl::equal_impl(label, ex, first1, last1, first2, last2); -} - -template -std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< - IteratorType1, IteratorType2>::value, - bool> -equal(const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, - IteratorType2 first2, IteratorType2 last2, - BinaryPredicateType predicate) { - return Impl::equal_impl("Kokkos::equal_iterator_api_default", ex, first1, - last1, first2, last2, std::move(predicate)); -} - -template -std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< - IteratorType1, IteratorType2>::value, - bool> -equal(const std::string& label, const ExecutionSpace& ex, IteratorType1 first1, - IteratorType1 last1, IteratorType2 first2, IteratorType2 last2, - BinaryPredicateType predicate) { - return Impl::equal_impl(label, ex, first1, last1, first2, last2, - std::move(predicate)); + return Impl::equal_team_impl(teamHandle, KE::cbegin(view1), KE::cend(view1), + KE::cbegin(view2)); +} + +template , int> = 0> +KOKKOS_FUNCTION bool equal( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view1, + const ::Kokkos::View& view2, + BinaryPredicateType predicate) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); + + namespace KE = ::Kokkos::Experimental; + return Impl::equal_team_impl(teamHandle, KE::cbegin(view1), KE::cend(view1), + KE::cbegin(view2), std::move(predicate)); +} + +template && ::Kokkos:: + is_team_handle_v, + int> = 0> +KOKKOS_FUNCTION bool equal(const TeamHandleType& teamHandle, + IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2, IteratorType2 last2) { + return Impl::equal_team_impl(teamHandle, first1, last1, first2, last2); +} + +template && ::Kokkos:: + is_team_handle_v, + int> = 0> +KOKKOS_FUNCTION bool equal(const TeamHandleType& teamHandle, + IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2, IteratorType2 last2, + BinaryPredicateType predicate) { + return Impl::equal_team_impl(teamHandle, first1, last1, first2, last2, + std::move(predicate)); } } // namespace Experimental diff --git a/algorithms/src/std_algorithms/Kokkos_ExclusiveScan.hpp b/algorithms/src/std_algorithms/Kokkos_ExclusiveScan.hpp index 4e05676c2c..ee3a105126 100644 --- a/algorithms/src/std_algorithms/Kokkos_ExclusiveScan.hpp +++ b/algorithms/src/std_algorithms/Kokkos_ExclusiveScan.hpp @@ -23,105 +23,130 @@ namespace Kokkos { namespace Experimental { +// +// overload set accepting execution space +// + // overload set 1 -template -std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators< - InputIteratorType, OutputIteratorType>::value, - OutputIteratorType> -exclusive_scan(const ExecutionSpace& ex, InputIteratorType first, - InputIteratorType last, OutputIteratorType first_dest, - ValueType init_value) { - static_assert(std::is_move_constructible::value, +template && :: + Kokkos::is_execution_space_v, + int> = 0> +OutputIteratorType exclusive_scan(const ExecutionSpace& ex, + InputIteratorType first, + InputIteratorType last, + OutputIteratorType first_dest, + ValueType init_value) { + static_assert(std::is_move_constructible_v, "ValueType must be move constructible."); - return Impl::exclusive_scan_default_op_impl( + return Impl::exclusive_scan_default_op_exespace_impl( "Kokkos::exclusive_scan_default_functors_iterator_api", ex, first, last, - first_dest, init_value); + first_dest, std::move(init_value)); } -template -std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators< - InputIteratorType, OutputIteratorType>::value, - OutputIteratorType> -exclusive_scan(const std::string& label, const ExecutionSpace& ex, - InputIteratorType first, InputIteratorType last, - OutputIteratorType first_dest, ValueType init_value) { - static_assert(std::is_move_constructible::value, +template && :: + Kokkos::is_execution_space_v, + int> = 0> +OutputIteratorType exclusive_scan(const std::string& label, + const ExecutionSpace& ex, + InputIteratorType first, + InputIteratorType last, + OutputIteratorType first_dest, + ValueType init_value) { + static_assert(std::is_move_constructible_v, "ValueType must be move constructible."); - return Impl::exclusive_scan_default_op_impl(label, ex, first, last, - first_dest, init_value); + return Impl::exclusive_scan_default_op_exespace_impl( + label, ex, first, last, first_dest, std::move(init_value)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto exclusive_scan(const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, ValueType init_value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); - static_assert(std::is_move_constructible::value, + static_assert(std::is_move_constructible_v, "ValueType must be move constructible."); namespace KE = ::Kokkos::Experimental; - return Impl::exclusive_scan_default_op_impl( + return Impl::exclusive_scan_default_op_exespace_impl( "Kokkos::exclusive_scan_default_functors_view_api", ex, KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), - init_value); + std::move(init_value)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto exclusive_scan(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, ValueType init_value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); - static_assert(std::is_move_constructible::value, + static_assert(std::is_move_constructible_v, "ValueType must be move constructible."); namespace KE = ::Kokkos::Experimental; - return Impl::exclusive_scan_default_op_impl(label, ex, KE::cbegin(view_from), - KE::cend(view_from), - KE::begin(view_dest), init_value); + return Impl::exclusive_scan_default_op_exespace_impl( + label, ex, KE::cbegin(view_from), KE::cend(view_from), + KE::begin(view_dest), std::move(init_value)); } // overload set 2 -template -std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators< - InputIteratorType, OutputIteratorType>::value, - OutputIteratorType> -exclusive_scan(const ExecutionSpace& ex, InputIteratorType first, - InputIteratorType last, OutputIteratorType first_dest, - ValueType init_value, BinaryOpType bop) { +template && :: + Kokkos::is_execution_space_v, + int> = 0> +OutputIteratorType exclusive_scan(const ExecutionSpace& ex, + InputIteratorType first, + InputIteratorType last, + OutputIteratorType first_dest, + ValueType init_value, BinaryOpType bop) { Impl::static_assert_is_not_openmptarget(ex); - static_assert(std::is_move_constructible::value, + static_assert(std::is_move_constructible_v, "ValueType must be move constructible."); - return Impl::exclusive_scan_custom_op_impl( + return Impl::exclusive_scan_custom_op_exespace_impl( "Kokkos::exclusive_scan_custom_functors_iterator_api", ex, first, last, - first_dest, init_value, bop); + first_dest, std::move(init_value), bop); } -template -std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators< - InputIteratorType, OutputIteratorType>::value, - OutputIteratorType> -exclusive_scan(const std::string& label, const ExecutionSpace& ex, - InputIteratorType first, InputIteratorType last, - OutputIteratorType first_dest, ValueType init_value, - BinaryOpType bop) { +template && :: + Kokkos::is_execution_space_v, + int> = 0> +OutputIteratorType exclusive_scan(const std::string& label, + const ExecutionSpace& ex, + InputIteratorType first, + InputIteratorType last, + OutputIteratorType first_dest, + ValueType init_value, BinaryOpType bop) { Impl::static_assert_is_not_openmptarget(ex); - static_assert(std::is_move_constructible::value, + static_assert(std::is_move_constructible_v, "ValueType must be move constructible."); - return Impl::exclusive_scan_custom_op_impl(label, ex, first, last, first_dest, - init_value, bop); + return Impl::exclusive_scan_custom_op_exespace_impl( + label, ex, first, last, first_dest, std::move(init_value), bop); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename ValueType, + typename BinaryOpType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto exclusive_scan(const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, @@ -129,18 +154,20 @@ auto exclusive_scan(const ExecutionSpace& ex, Impl::static_assert_is_not_openmptarget(ex); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); - static_assert(std::is_move_constructible::value, + static_assert(std::is_move_constructible_v, "ValueType must be move constructible."); namespace KE = ::Kokkos::Experimental; - return Impl::exclusive_scan_custom_op_impl( + return Impl::exclusive_scan_custom_op_exespace_impl( "Kokkos::exclusive_scan_custom_functors_view_api", ex, KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), - init_value, bop); + std::move(init_value), bop); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename ValueType, + typename BinaryOpType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto exclusive_scan(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, @@ -148,12 +175,92 @@ auto exclusive_scan(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_not_openmptarget(ex); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); - static_assert(std::is_move_constructible::value, + static_assert(std::is_move_constructible_v, "ValueType must be move constructible."); namespace KE = ::Kokkos::Experimental; - return Impl::exclusive_scan_custom_op_impl( + return Impl::exclusive_scan_custom_op_exespace_impl( label, ex, KE::cbegin(view_from), KE::cend(view_from), - KE::begin(view_dest), init_value, bop); + KE::begin(view_dest), std::move(init_value), bop); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// + +// overload set 1 +template && + Kokkos::is_team_handle_v, + int> = 0> +KOKKOS_FUNCTION OutputIteratorType +exclusive_scan(const TeamHandleType& teamHandle, InputIteratorType first, + InputIteratorType last, OutputIteratorType first_dest, + ValueType init_value) { + static_assert(std::is_move_constructible_v, + "ValueType must be move constructible."); + return Impl::exclusive_scan_default_op_team_impl( + teamHandle, first, last, first_dest, std::move(init_value)); +} + +template , int> = 0> +KOKKOS_FUNCTION auto exclusive_scan( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view_from, + const ::Kokkos::View& view_dest, + ValueType init_value) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + static_assert(std::is_move_constructible_v, + "ValueType must be move constructible."); + namespace KE = ::Kokkos::Experimental; + return Impl::exclusive_scan_default_op_team_impl( + teamHandle, KE::cbegin(view_from), KE::cend(view_from), + KE::begin(view_dest), std::move(init_value)); +} + +// overload set 2 +template && + Kokkos::is_team_handle_v, + int> = 0> +KOKKOS_FUNCTION OutputIteratorType +exclusive_scan(const TeamHandleType& teamHandle, InputIteratorType first, + InputIteratorType last, OutputIteratorType first_dest, + ValueType init_value, BinaryOpType bop) { + Impl::static_assert_is_not_openmptarget(teamHandle); + static_assert(std::is_move_constructible_v, + "ValueType must be move constructible."); + return Impl::exclusive_scan_custom_op_team_impl( + teamHandle, first, last, first_dest, std::move(init_value), bop); +} + +template , int> = 0> +KOKKOS_FUNCTION auto exclusive_scan( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view_from, + const ::Kokkos::View& view_dest, + ValueType init_value, BinaryOpType bop) { + Impl::static_assert_is_not_openmptarget(teamHandle); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + static_assert(std::is_move_constructible_v, + "ValueType must be move constructible."); + namespace KE = ::Kokkos::Experimental; + return Impl::exclusive_scan_custom_op_team_impl( + teamHandle, KE::cbegin(view_from), KE::cend(view_from), + KE::begin(view_dest), std::move(init_value), bop); } } // namespace Experimental diff --git a/algorithms/src/std_algorithms/Kokkos_Fill.hpp b/algorithms/src/std_algorithms/Kokkos_Fill.hpp index 1e300a4c20..6d805ba1be 100644 --- a/algorithms/src/std_algorithms/Kokkos_Fill.hpp +++ b/algorithms/src/std_algorithms/Kokkos_Fill.hpp @@ -23,33 +23,67 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename IteratorType, typename T, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> void fill(const ExecutionSpace& ex, IteratorType first, IteratorType last, const T& value) { - Impl::fill_impl("Kokkos::fill_iterator_api_default", ex, first, last, value); + Impl::fill_exespace_impl("Kokkos::fill_iterator_api_default", ex, first, last, + value); } -template +template < + typename ExecutionSpace, typename IteratorType, typename T, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> void fill(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, const T& value) { - Impl::fill_impl(label, ex, first, last, value); + Impl::fill_exespace_impl(label, ex, first, last, value); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename T, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> void fill(const ExecutionSpace& ex, const ::Kokkos::View& view, const T& value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - - Impl::fill_impl("Kokkos::fill_view_api_default", ex, begin(view), end(view), - value); + Impl::fill_exespace_impl("Kokkos::fill_view_api_default", ex, begin(view), + end(view), value); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename T, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> void fill(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, const T& value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + Impl::fill_exespace_impl(label, ex, begin(view), end(view), value); +} - Impl::fill_impl(label, ex, begin(view), end(view), value); +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION void fill(const TeamHandleType& th, IteratorType first, + IteratorType last, const T& value) { + Impl::fill_team_impl(th, first, last, value); +} + +template , int> = 0> +KOKKOS_FUNCTION void fill(const TeamHandleType& th, + const ::Kokkos::View& view, + const T& value) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + Impl::fill_team_impl(th, begin(view), end(view), value); } } // namespace Experimental diff --git a/algorithms/src/std_algorithms/Kokkos_FillN.hpp b/algorithms/src/std_algorithms/Kokkos_FillN.hpp index 02503dfd14..66b8cd66cc 100644 --- a/algorithms/src/std_algorithms/Kokkos_FillN.hpp +++ b/algorithms/src/std_algorithms/Kokkos_FillN.hpp @@ -23,38 +23,72 @@ namespace Kokkos { namespace Experimental { -template +template < + typename ExecutionSpace, typename IteratorType, typename SizeType, + typename T, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType fill_n(const ExecutionSpace& ex, IteratorType first, SizeType n, const T& value) { - return Impl::fill_n_impl("Kokkos::fill_n_iterator_api_default", ex, first, n, - value); + return Impl::fill_n_exespace_impl("Kokkos::fill_n_iterator_api_default", ex, + first, n, value); } -template +template < + typename ExecutionSpace, typename IteratorType, typename SizeType, + typename T, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType fill_n(const std::string& label, const ExecutionSpace& ex, IteratorType first, SizeType n, const T& value) { - return Impl::fill_n_impl(label, ex, first, n, value); + return Impl::fill_n_exespace_impl(label, ex, first, n, value); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename SizeType, typename T, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto fill_n(const ExecutionSpace& ex, const ::Kokkos::View& view, SizeType n, const T& value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::fill_n_impl("Kokkos::fill_n_view_api_default", ex, begin(view), - n, value); + return Impl::fill_n_exespace_impl("Kokkos::fill_n_view_api_default", ex, + begin(view), n, value); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename SizeType, typename T, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto fill_n(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, SizeType n, const T& value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::fill_n_impl(label, ex, begin(view), n, value); + return Impl::fill_n_exespace_impl(label, ex, begin(view), n, value); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION IteratorType fill_n(const TeamHandleType& th, + IteratorType first, SizeType n, + const T& value) { + return Impl::fill_n_team_impl(th, first, n, value); +} + +template , int> = 0> +KOKKOS_FUNCTION auto fill_n(const TeamHandleType& th, + const ::Kokkos::View& view, + SizeType n, const T& value) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + return Impl::fill_n_team_impl(th, begin(view), n, value); } } // namespace Experimental diff --git a/algorithms/src/std_algorithms/Kokkos_Find.hpp b/algorithms/src/std_algorithms/Kokkos_Find.hpp index 65b68cf931..e5e2b0e2b0 100644 --- a/algorithms/src/std_algorithms/Kokkos_Find.hpp +++ b/algorithms/src/std_algorithms/Kokkos_Find.hpp @@ -23,36 +23,76 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename InputIterator, typename T, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> InputIterator find(const ExecutionSpace& ex, InputIterator first, InputIterator last, const T& value) { - return Impl::find_impl("Kokkos::find_iterator_api_default", ex, first, last, - value); + return Impl::find_exespace_impl("Kokkos::find_iterator_api_default", ex, + first, last, value); } -template +template < + typename ExecutionSpace, typename InputIterator, typename T, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> InputIterator find(const std::string& label, const ExecutionSpace& ex, InputIterator first, InputIterator last, const T& value) { - return Impl::find_impl(label, ex, first, last, value); + return Impl::find_exespace_impl(label, ex, first, last, value); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename T, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto find(const ExecutionSpace& ex, const ::Kokkos::View& view, const T& value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); namespace KE = ::Kokkos::Experimental; - return Impl::find_impl("Kokkos::find_view_api_default", ex, KE::begin(view), - KE::end(view), value); + return Impl::find_exespace_impl("Kokkos::find_view_api_default", ex, + KE::begin(view), KE::end(view), value); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename T, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto find(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, const T& value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); namespace KE = ::Kokkos::Experimental; - return Impl::find_impl(label, ex, KE::begin(view), KE::end(view), value); + return Impl::find_exespace_impl(label, ex, KE::begin(view), KE::end(view), + value); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION InputIterator find(const TeamHandleType& teamHandle, + InputIterator first, InputIterator last, + const T& value) { + return Impl::find_team_impl(teamHandle, first, last, value); +} + +template , int> = 0> +KOKKOS_FUNCTION auto find(const TeamHandleType& teamHandle, + const ::Kokkos::View& view, + const T& value) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + + namespace KE = ::Kokkos::Experimental; + return Impl::find_team_impl(teamHandle, KE::begin(view), KE::end(view), + value); } } // namespace Experimental diff --git a/algorithms/src/std_algorithms/Kokkos_FindEnd.hpp b/algorithms/src/std_algorithms/Kokkos_FindEnd.hpp index f6a38855eb..a4ec735fd5 100644 --- a/algorithms/src/std_algorithms/Kokkos_FindEnd.hpp +++ b/algorithms/src/std_algorithms/Kokkos_FindEnd.hpp @@ -24,24 +24,34 @@ namespace Kokkos { namespace Experimental { +// +// overload set accepting execution space +// + // overload set 1: no binary predicate passed -template +template < + typename ExecutionSpace, typename IteratorType1, typename IteratorType2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType1 find_end(const ExecutionSpace& ex, IteratorType1 first, IteratorType1 last, IteratorType2 s_first, IteratorType2 s_last) { - return Impl::find_end_impl("Kokkos::find_end_iterator_api_default", ex, first, - last, s_first, s_last); + return Impl::find_end_exespace_impl("Kokkos::find_end_iterator_api_default", + ex, first, last, s_first, s_last); } -template +template < + typename ExecutionSpace, typename IteratorType1, typename IteratorType2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType1 find_end(const std::string& label, const ExecutionSpace& ex, IteratorType1 first, IteratorType1 last, IteratorType2 s_first, IteratorType2 s_last) { - return Impl::find_end_impl(label, ex, first, last, s_first, s_last); + return Impl::find_end_exespace_impl(label, ex, first, last, s_first, s_last); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto find_end(const ExecutionSpace& ex, const ::Kokkos::View& view, const ::Kokkos::View& s_view) { @@ -49,13 +59,15 @@ auto find_end(const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); namespace KE = ::Kokkos::Experimental; - return Impl::find_end_impl("Kokkos::find_end_view_api_default", ex, - KE::begin(view), KE::end(view), KE::begin(s_view), - KE::end(s_view)); + return Impl::find_end_exespace_impl("Kokkos::find_end_view_api_default", ex, + KE::begin(view), KE::end(view), + KE::begin(s_view), KE::end(s_view)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto find_end(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, const ::Kokkos::View& s_view) { @@ -63,31 +75,38 @@ auto find_end(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); namespace KE = ::Kokkos::Experimental; - return Impl::find_end_impl(label, ex, KE::begin(view), KE::end(view), - KE::begin(s_view), KE::end(s_view)); + return Impl::find_end_exespace_impl(label, ex, KE::begin(view), KE::end(view), + KE::begin(s_view), KE::end(s_view)); } // overload set 2: binary predicate passed -template +template < + typename ExecutionSpace, typename IteratorType1, typename IteratorType2, + typename BinaryPredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType1 find_end(const ExecutionSpace& ex, IteratorType1 first, IteratorType1 last, IteratorType2 s_first, IteratorType2 s_last, const BinaryPredicateType& pred) { - return Impl::find_end_impl("Kokkos::find_end_iterator_api_default", ex, first, - last, s_first, s_last, pred); + return Impl::find_end_exespace_impl("Kokkos::find_end_iterator_api_default", + ex, first, last, s_first, s_last, pred); } -template +template < + typename ExecutionSpace, typename IteratorType1, typename IteratorType2, + typename BinaryPredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType1 find_end(const std::string& label, const ExecutionSpace& ex, IteratorType1 first, IteratorType1 last, IteratorType2 s_first, IteratorType2 s_last, const BinaryPredicateType& pred) { - return Impl::find_end_impl(label, ex, first, last, s_first, s_last, pred); + return Impl::find_end_exespace_impl(label, ex, first, last, s_first, s_last, + pred); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename BinaryPredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto find_end(const ExecutionSpace& ex, const ::Kokkos::View& view, const ::Kokkos::View& s_view, @@ -96,13 +115,15 @@ auto find_end(const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); namespace KE = ::Kokkos::Experimental; - return Impl::find_end_impl("Kokkos::find_end_view_api_default", ex, - KE::begin(view), KE::end(view), KE::begin(s_view), - KE::end(s_view), pred); + return Impl::find_end_exespace_impl("Kokkos::find_end_view_api_default", ex, + KE::begin(view), KE::end(view), + KE::begin(s_view), KE::end(s_view), pred); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename BinaryPredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto find_end(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, const ::Kokkos::View& s_view, @@ -111,8 +132,71 @@ auto find_end(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); namespace KE = ::Kokkos::Experimental; - return Impl::find_end_impl(label, ex, KE::begin(view), KE::end(view), - KE::begin(s_view), KE::end(s_view), pred); + return Impl::find_end_exespace_impl(label, ex, KE::begin(view), KE::end(view), + KE::begin(s_view), KE::end(s_view), pred); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// + +// overload set 1: no binary predicate passed +template , int> = 0> +KOKKOS_FUNCTION IteratorType1 find_end(const TeamHandleType& teamHandle, + IteratorType1 first, IteratorType1 last, + IteratorType2 s_first, + IteratorType2 s_last) { + return Impl::find_end_team_impl(teamHandle, first, last, s_first, s_last); +} + +template , int> = 0> +KOKKOS_FUNCTION auto find_end( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view, + const ::Kokkos::View& s_view) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); + + namespace KE = ::Kokkos::Experimental; + return Impl::find_end_team_impl(teamHandle, KE::begin(view), KE::end(view), + KE::begin(s_view), KE::end(s_view)); +} + +// overload set 2: binary predicate passed +template , int> = 0> + +KOKKOS_FUNCTION IteratorType1 find_end(const TeamHandleType& teamHandle, + IteratorType1 first, IteratorType1 last, + IteratorType2 s_first, + IteratorType2 s_last, + const BinaryPredicateType& pred) { + return Impl::find_end_team_impl(teamHandle, first, last, s_first, s_last, + pred); +} + +template , int> = 0> +KOKKOS_FUNCTION auto find_end( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view, + const ::Kokkos::View& s_view, + const BinaryPredicateType& pred) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); + + namespace KE = ::Kokkos::Experimental; + return Impl::find_end_team_impl(teamHandle, KE::begin(view), KE::end(view), + KE::begin(s_view), KE::end(s_view), pred); } } // namespace Experimental diff --git a/algorithms/src/std_algorithms/Kokkos_FindFirstOf.hpp b/algorithms/src/std_algorithms/Kokkos_FindFirstOf.hpp index 6b0e4993ee..341a70e2f2 100644 --- a/algorithms/src/std_algorithms/Kokkos_FindFirstOf.hpp +++ b/algorithms/src/std_algorithms/Kokkos_FindFirstOf.hpp @@ -23,24 +23,36 @@ namespace Kokkos { namespace Experimental { +// +// overload set accepting execution space +// + // overload set 1: no binary predicate passed -template +template < + typename ExecutionSpace, typename IteratorType1, typename IteratorType2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType1 find_first_of(const ExecutionSpace& ex, IteratorType1 first, IteratorType1 last, IteratorType2 s_first, IteratorType2 s_last) { - return Impl::find_first_of_impl("Kokkos::find_first_of_iterator_api_default", - ex, first, last, s_first, s_last); + return Impl::find_first_of_exespace_impl( + "Kokkos::find_first_of_iterator_api_default", ex, first, last, s_first, + s_last); } -template +template < + typename ExecutionSpace, typename IteratorType1, typename IteratorType2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType1 find_first_of(const std::string& label, const ExecutionSpace& ex, IteratorType1 first, IteratorType1 last, IteratorType2 s_first, IteratorType2 s_last) { - return Impl::find_first_of_impl(label, ex, first, last, s_first, s_last); + return Impl::find_first_of_exespace_impl(label, ex, first, last, s_first, + s_last); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto find_first_of(const ExecutionSpace& ex, const ::Kokkos::View& view, const ::Kokkos::View& s_view) { @@ -48,13 +60,15 @@ auto find_first_of(const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); namespace KE = ::Kokkos::Experimental; - return Impl::find_first_of_impl("Kokkos::find_first_of_view_api_default", ex, - KE::begin(view), KE::end(view), - KE::begin(s_view), KE::end(s_view)); + return Impl::find_first_of_exespace_impl( + "Kokkos::find_first_of_view_api_default", ex, KE::begin(view), + KE::end(view), KE::begin(s_view), KE::end(s_view)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto find_first_of(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, const ::Kokkos::View& s_view) { @@ -62,33 +76,41 @@ auto find_first_of(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); namespace KE = ::Kokkos::Experimental; - return Impl::find_first_of_impl(label, ex, KE::begin(view), KE::end(view), - KE::begin(s_view), KE::end(s_view)); + return Impl::find_first_of_exespace_impl(label, ex, KE::begin(view), + KE::end(view), KE::begin(s_view), + KE::end(s_view)); } // overload set 2: binary predicate passed -template +template < + typename ExecutionSpace, typename IteratorType1, typename IteratorType2, + typename BinaryPredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType1 find_first_of(const ExecutionSpace& ex, IteratorType1 first, IteratorType1 last, IteratorType2 s_first, IteratorType2 s_last, const BinaryPredicateType& pred) { - return Impl::find_first_of_impl("Kokkos::find_first_of_iterator_api_default", - ex, first, last, s_first, s_last, pred); + return Impl::find_first_of_exespace_impl( + "Kokkos::find_first_of_iterator_api_default", ex, first, last, s_first, + s_last, pred); } -template +template < + typename ExecutionSpace, typename IteratorType1, typename IteratorType2, + typename BinaryPredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType1 find_first_of(const std::string& label, const ExecutionSpace& ex, IteratorType1 first, IteratorType1 last, IteratorType2 s_first, IteratorType2 s_last, const BinaryPredicateType& pred) { - return Impl::find_first_of_impl(label, ex, first, last, s_first, s_last, - pred); + return Impl::find_first_of_exespace_impl(label, ex, first, last, s_first, + s_last, pred); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename BinaryPredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto find_first_of(const ExecutionSpace& ex, const ::Kokkos::View& view, const ::Kokkos::View& s_view, @@ -97,13 +119,15 @@ auto find_first_of(const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); namespace KE = ::Kokkos::Experimental; - return Impl::find_first_of_impl("Kokkos::find_first_of_view_api_default", ex, - KE::begin(view), KE::end(view), - KE::begin(s_view), KE::end(s_view), pred); + return Impl::find_first_of_exespace_impl( + "Kokkos::find_first_of_view_api_default", ex, KE::begin(view), + KE::end(view), KE::begin(s_view), KE::end(s_view), pred); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename BinaryPredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto find_first_of(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, const ::Kokkos::View& s_view, @@ -112,8 +136,77 @@ auto find_first_of(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); namespace KE = ::Kokkos::Experimental; - return Impl::find_first_of_impl(label, ex, KE::begin(view), KE::end(view), - KE::begin(s_view), KE::end(s_view), pred); + return Impl::find_first_of_exespace_impl(label, ex, KE::begin(view), + KE::end(view), KE::begin(s_view), + KE::end(s_view), pred); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// + +// overload set 1: no binary predicate passed +template , int> = 0> +KOKKOS_FUNCTION IteratorType1 find_first_of(const TeamHandleType& teamHandle, + IteratorType1 first, + IteratorType1 last, + IteratorType2 s_first, + IteratorType2 s_last) { + return Impl::find_first_of_team_impl(teamHandle, first, last, s_first, + s_last); +} + +template , int> = 0> +KOKKOS_FUNCTION auto find_first_of( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view, + const ::Kokkos::View& s_view) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); + + namespace KE = ::Kokkos::Experimental; + return Impl::find_first_of_team_impl(teamHandle, KE::begin(view), + KE::end(view), KE::begin(s_view), + KE::end(s_view)); +} + +// overload set 2: binary predicate passed +template , int> = 0> + +KOKKOS_FUNCTION IteratorType1 find_first_of(const TeamHandleType& teamHandle, + IteratorType1 first, + IteratorType1 last, + IteratorType2 s_first, + IteratorType2 s_last, + const BinaryPredicateType& pred) { + return Impl::find_first_of_team_impl(teamHandle, first, last, s_first, s_last, + pred); +} + +template , int> = 0> +KOKKOS_FUNCTION auto find_first_of( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view, + const ::Kokkos::View& s_view, + const BinaryPredicateType& pred) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); + + namespace KE = ::Kokkos::Experimental; + return Impl::find_first_of_team_impl(teamHandle, KE::begin(view), + KE::end(view), KE::begin(s_view), + KE::end(s_view), pred); } } // namespace Experimental diff --git a/algorithms/src/std_algorithms/Kokkos_FindIf.hpp b/algorithms/src/std_algorithms/Kokkos_FindIf.hpp index 911316a668..283fab7617 100644 --- a/algorithms/src/std_algorithms/Kokkos_FindIf.hpp +++ b/algorithms/src/std_algorithms/Kokkos_FindIf.hpp @@ -23,42 +23,82 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename IteratorType, typename PredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType find_if(const ExecutionSpace& ex, IteratorType first, IteratorType last, PredicateType predicate) { - return Impl::find_if_or_not_impl("Kokkos::find_if_iterator_api_default", - ex, first, last, std::move(predicate)); + return Impl::find_if_or_not_exespace_impl( + "Kokkos::find_if_iterator_api_default", ex, first, last, + std::move(predicate)); } -template +template < + typename ExecutionSpace, typename IteratorType, typename PredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType find_if(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, PredicateType predicate) { - return Impl::find_if_or_not_impl(label, ex, first, last, - std::move(predicate)); + return Impl::find_if_or_not_exespace_impl(label, ex, first, last, + std::move(predicate)); } -template +template ::value, + int> = 0> auto find_if(const ExecutionSpace& ex, const ::Kokkos::View& v, Predicate predicate) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; - return Impl::find_if_or_not_impl("Kokkos::find_if_view_api_default", ex, - KE::begin(v), KE::end(v), - std::move(predicate)); + return Impl::find_if_or_not_exespace_impl( + "Kokkos::find_if_view_api_default", ex, KE::begin(v), KE::end(v), + std::move(predicate)); } -template +template ::value, + int> = 0> auto find_if(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& v, Predicate predicate) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; - return Impl::find_if_or_not_impl(label, ex, KE::begin(v), KE::end(v), - std::move(predicate)); + return Impl::find_if_or_not_exespace_impl( + label, ex, KE::begin(v), KE::end(v), std::move(predicate)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION IteratorType find_if(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last, + PredicateType predicate) { + return Impl::find_if_or_not_team_impl(teamHandle, first, last, + std::move(predicate)); +} + +template < + typename TeamHandleType, typename DataType, typename... Properties, + typename Predicate, + std::enable_if_t<::Kokkos::is_team_handle::value, int> = 0> +KOKKOS_FUNCTION auto find_if(const TeamHandleType& teamHandle, + const ::Kokkos::View& v, + Predicate predicate) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + namespace KE = ::Kokkos::Experimental; + return Impl::find_if_or_not_team_impl(teamHandle, KE::begin(v), + KE::end(v), std::move(predicate)); } } // namespace Experimental diff --git a/algorithms/src/std_algorithms/Kokkos_FindIfNot.hpp b/algorithms/src/std_algorithms/Kokkos_FindIfNot.hpp index 18294d7b7d..5e17a6f539 100644 --- a/algorithms/src/std_algorithms/Kokkos_FindIfNot.hpp +++ b/algorithms/src/std_algorithms/Kokkos_FindIfNot.hpp @@ -23,45 +23,84 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename IteratorType, typename Predicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType find_if_not(const ExecutionSpace& ex, IteratorType first, IteratorType last, Predicate predicate) { - return Impl::find_if_or_not_impl( + return Impl::find_if_or_not_exespace_impl( "Kokkos::find_if_not_iterator_api_default", ex, first, last, std::move(predicate)); } -template +template < + typename ExecutionSpace, typename IteratorType, typename Predicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType find_if_not(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, Predicate predicate) { - return Impl::find_if_or_not_impl(label, ex, first, last, - std::move(predicate)); + return Impl::find_if_or_not_exespace_impl(label, ex, first, last, + std::move(predicate)); } -template +template ::value, + int> = 0> auto find_if_not(const ExecutionSpace& ex, const ::Kokkos::View& v, Predicate predicate) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; - return Impl::find_if_or_not_impl( + return Impl::find_if_or_not_exespace_impl( "Kokkos::find_if_not_view_api_default", ex, KE::begin(v), KE::end(v), std::move(predicate)); } -template +template ::value, + int> = 0> auto find_if_not(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& v, Predicate predicate) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; - return Impl::find_if_or_not_impl(label, ex, KE::begin(v), KE::end(v), - std::move(predicate)); + return Impl::find_if_or_not_exespace_impl( + label, ex, KE::begin(v), KE::end(v), std::move(predicate)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION IteratorType find_if_not(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last, + Predicate predicate) { + return Impl::find_if_or_not_team_impl(teamHandle, first, last, + std::move(predicate)); +} + +template < + typename TeamHandleType, typename DataType, typename... Properties, + typename Predicate, + std::enable_if_t<::Kokkos::is_team_handle::value, int> = 0> +KOKKOS_FUNCTION auto find_if_not( + const TeamHandleType& teamHandle, + const ::Kokkos::View& v, Predicate predicate) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + + namespace KE = ::Kokkos::Experimental; + return Impl::find_if_or_not_team_impl( + teamHandle, KE::begin(v), KE::end(v), std::move(predicate)); } } // namespace Experimental diff --git a/algorithms/src/std_algorithms/Kokkos_ForEach.hpp b/algorithms/src/std_algorithms/Kokkos_ForEach.hpp index d7b08e4842..05969be463 100644 --- a/algorithms/src/std_algorithms/Kokkos_ForEach.hpp +++ b/algorithms/src/std_algorithms/Kokkos_ForEach.hpp @@ -23,42 +23,79 @@ namespace Kokkos { namespace Experimental { -template -UnaryFunctorType for_each(const std::string& label, const ExecutionSpace& ex, - IteratorType first, IteratorType last, - UnaryFunctorType functor) { - return Impl::for_each_impl(label, ex, first, last, std::move(functor)); +// +// overload set accepting execution space +// +template < + class ExecutionSpace, class IteratorType, class UnaryFunctorType, + std::enable_if_t, int> = 0> +void for_each(const std::string& label, const ExecutionSpace& ex, + IteratorType first, IteratorType last, UnaryFunctorType functor) { + Impl::for_each_exespace_impl(label, ex, first, last, std::move(functor)); +} + +template < + class ExecutionSpace, class IteratorType, class UnaryFunctorType, + std::enable_if_t, int> = 0> +void for_each(const ExecutionSpace& ex, IteratorType first, IteratorType last, + UnaryFunctorType functor) { + Impl::for_each_exespace_impl("Kokkos::for_each_iterator_api_default", ex, + first, last, std::move(functor)); } -template -UnaryFunctorType for_each(const ExecutionSpace& ex, IteratorType first, - IteratorType last, UnaryFunctorType functor) { - return Impl::for_each_impl("Kokkos::for_each_iterator_api_default", ex, first, - last, std::move(functor)); +template < + class ExecutionSpace, class DataType, class... Properties, + class UnaryFunctorType, + std::enable_if_t, int> = 0> +void for_each(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View& v, + UnaryFunctorType functor) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + + namespace KE = ::Kokkos::Experimental; + Impl::for_each_exespace_impl(label, ex, KE::begin(v), KE::end(v), + std::move(functor)); } -template -UnaryFunctorType for_each(const std::string& label, const ExecutionSpace& ex, - const ::Kokkos::View& v, - UnaryFunctorType functor) { +template < + class ExecutionSpace, class DataType, class... Properties, + class UnaryFunctorType, + std::enable_if_t, int> = 0> +void for_each(const ExecutionSpace& ex, + const ::Kokkos::View& v, + UnaryFunctorType functor) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; - return Impl::for_each_impl(label, ex, KE::begin(v), KE::end(v), - std::move(functor)); + Impl::for_each_exespace_impl("Kokkos::for_each_view_api_default", ex, + KE::begin(v), KE::end(v), std::move(functor)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// + +template , int> = 0> +KOKKOS_FUNCTION void for_each(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last, + UnaryFunctorType functor) { + Impl::for_each_team_impl(teamHandle, first, last, std::move(functor)); } -template -UnaryFunctorType for_each(const ExecutionSpace& ex, - const ::Kokkos::View& v, - UnaryFunctorType functor) { +template , int> = 0> +KOKKOS_FUNCTION void for_each(const TeamHandleType& teamHandle, + const ::Kokkos::View& v, + UnaryFunctorType functor) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; - return Impl::for_each_impl("Kokkos::for_each_view_api_default", ex, - KE::begin(v), KE::end(v), std::move(functor)); + Impl::for_each_team_impl(teamHandle, KE::begin(v), KE::end(v), + std::move(functor)); } } // namespace Experimental diff --git a/algorithms/src/std_algorithms/Kokkos_ForEachN.hpp b/algorithms/src/std_algorithms/Kokkos_ForEachN.hpp index f1769da05b..e6fbcad891 100644 --- a/algorithms/src/std_algorithms/Kokkos_ForEachN.hpp +++ b/algorithms/src/std_algorithms/Kokkos_ForEachN.hpp @@ -23,43 +23,87 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + class ExecutionSpace, class IteratorType, class SizeType, + class UnaryFunctorType, + std::enable_if_t, int> = 0> IteratorType for_each_n(const std::string& label, const ExecutionSpace& ex, IteratorType first, SizeType n, UnaryFunctorType functor) { - return Impl::for_each_n_impl(label, ex, first, n, std::move(functor)); + return Impl::for_each_n_exespace_impl(label, ex, first, n, + std::move(functor)); } -template +template < + class ExecutionSpace, class IteratorType, class SizeType, + class UnaryFunctorType, + std::enable_if_t, int> = 0> IteratorType for_each_n(const ExecutionSpace& ex, IteratorType first, SizeType n, UnaryFunctorType functor) { - return Impl::for_each_n_impl("Kokkos::for_each_n_iterator_api_default", ex, - first, n, std::move(functor)); + return Impl::for_each_n_exespace_impl( + "Kokkos::for_each_n_iterator_api_default", ex, first, n, + std::move(functor)); } -template +template < + class ExecutionSpace, class DataType, class... Properties, class SizeType, + class UnaryFunctorType, + std::enable_if_t, int> = 0> auto for_each_n(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& v, SizeType n, UnaryFunctorType functor) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; - return Impl::for_each_n_impl(label, ex, KE::begin(v), n, std::move(functor)); + return Impl::for_each_n_exespace_impl(label, ex, KE::begin(v), n, + std::move(functor)); } -template +template < + class ExecutionSpace, class DataType, class... Properties, class SizeType, + class UnaryFunctorType, + std::enable_if_t, int> = 0> auto for_each_n(const ExecutionSpace& ex, const ::Kokkos::View& v, SizeType n, UnaryFunctorType functor) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; - return Impl::for_each_n_impl("Kokkos::for_each_n_view_api_default", ex, - KE::begin(v), n, std::move(functor)); + return Impl::for_each_n_exespace_impl("Kokkos::for_each_n_view_api_default", + ex, KE::begin(v), n, + std::move(functor)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// + +template , int> = 0> +KOKKOS_FUNCTION IteratorType for_each_n(const TeamHandleType& teamHandle, + IteratorType first, SizeType n, + UnaryFunctorType functor) { + return Impl::for_each_n_team_impl(teamHandle, first, n, std::move(functor)); +} + +template , int> = 0> +KOKKOS_FUNCTION auto for_each_n( + const TeamHandleType& teamHandle, + const ::Kokkos::View& v, SizeType n, + UnaryFunctorType functor) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + + namespace KE = ::Kokkos::Experimental; + return Impl::for_each_n_team_impl(teamHandle, KE::begin(v), n, + std::move(functor)); } } // namespace Experimental diff --git a/algorithms/src/std_algorithms/Kokkos_Generate.hpp b/algorithms/src/std_algorithms/Kokkos_Generate.hpp index 13e12783e0..a3295084ee 100644 --- a/algorithms/src/std_algorithms/Kokkos_Generate.hpp +++ b/algorithms/src/std_algorithms/Kokkos_Generate.hpp @@ -23,38 +23,68 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template , int> = 0> void generate(const ExecutionSpace& ex, IteratorType first, IteratorType last, Generator g) { - Impl::generate_impl("Kokkos::generate_iterator_api_default", ex, first, last, - std::move(g)); + Impl::generate_exespace_impl("Kokkos::generate_iterator_api_default", ex, + first, last, std::move(g)); } -template +template , int> = 0> void generate(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, Generator g) { - Impl::generate_impl(label, ex, first, last, std::move(g)); + Impl::generate_exespace_impl(label, ex, first, last, std::move(g)); } -template +template , int> = 0> void generate(const ExecutionSpace& ex, const ::Kokkos::View& view, Generator g) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - Impl::generate_impl("Kokkos::generate_view_api_default", ex, begin(view), - end(view), std::move(g)); + Impl::generate_exespace_impl("Kokkos::generate_view_api_default", ex, + begin(view), end(view), std::move(g)); } -template +template , int> = 0> void generate(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, Generator g) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - Impl::generate_impl(label, ex, begin(view), end(view), std::move(g)); + Impl::generate_exespace_impl(label, ex, begin(view), end(view), std::move(g)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION void generate(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last, + Generator g) { + Impl::generate_team_impl(teamHandle, first, last, std::move(g)); +} + +template , int> = 0> +KOKKOS_FUNCTION void generate( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view, Generator g) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + Impl::generate_team_impl(teamHandle, begin(view), end(view), std::move(g)); } } // namespace Experimental diff --git a/algorithms/src/std_algorithms/Kokkos_GenerateN.hpp b/algorithms/src/std_algorithms/Kokkos_GenerateN.hpp index 4d17512228..e480062c23 100644 --- a/algorithms/src/std_algorithms/Kokkos_GenerateN.hpp +++ b/algorithms/src/std_algorithms/Kokkos_GenerateN.hpp @@ -23,40 +23,75 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template , int> = 0> IteratorType generate_n(const ExecutionSpace& ex, IteratorType first, Size count, Generator g) { - Impl::generate_n_impl("Kokkos::generate_n_iterator_api_default", ex, first, - count, std::move(g)); - return first + count; + return Impl::generate_n_exespace_impl( + "Kokkos::generate_n_iterator_api_default", ex, first, count, + std::move(g)); } -template +template , int> = 0> IteratorType generate_n(const std::string& label, const ExecutionSpace& ex, IteratorType first, Size count, Generator g) { - Impl::generate_n_impl(label, ex, first, count, std::move(g)); - return first + count; + return Impl::generate_n_exespace_impl(label, ex, first, count, std::move(g)); } -template +template , int> = 0> auto generate_n(const ExecutionSpace& ex, const ::Kokkos::View& view, Size count, Generator g) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::generate_n_impl("Kokkos::generate_n_view_api_default", ex, - begin(view), count, std::move(g)); + return Impl::generate_n_exespace_impl("Kokkos::generate_n_view_api_default", + ex, begin(view), count, std::move(g)); } -template +template , int> = 0> auto generate_n(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, Size count, Generator g) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::generate_n_impl(label, ex, begin(view), count, std::move(g)); + return Impl::generate_n_exespace_impl(label, ex, begin(view), count, + std::move(g)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION IteratorType generate_n(const TeamHandleType& teamHandle, + IteratorType first, Size count, + Generator g) { + return Impl::generate_n_team_impl(teamHandle, first, count, std::move(g)); +} + +template , int> = 0> +KOKKOS_FUNCTION auto generate_n( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view, Size count, + Generator g) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + return Impl::generate_n_team_impl(teamHandle, begin(view), count, + std::move(g)); } } // namespace Experimental diff --git a/algorithms/src/std_algorithms/Kokkos_InclusiveScan.hpp b/algorithms/src/std_algorithms/Kokkos_InclusiveScan.hpp index bcd731b850..a0e540b5e7 100644 --- a/algorithms/src/std_algorithms/Kokkos_InclusiveScan.hpp +++ b/algorithms/src/std_algorithms/Kokkos_InclusiveScan.hpp @@ -23,33 +23,45 @@ namespace Kokkos { namespace Experimental { +// +// overload set accepting execution space +// + // overload set 1 -template -std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators< - InputIteratorType, OutputIteratorType>::value, - OutputIteratorType> -inclusive_scan(const ExecutionSpace& ex, InputIteratorType first, - InputIteratorType last, OutputIteratorType first_dest) { - return Impl::inclusive_scan_default_op_impl( +template && :: + Kokkos::is_execution_space_v, + int> = 0> +OutputIteratorType inclusive_scan(const ExecutionSpace& ex, + InputIteratorType first, + InputIteratorType last, + OutputIteratorType first_dest) { + return Impl::inclusive_scan_default_op_exespace_impl( "Kokkos::inclusive_scan_default_functors_iterator_api", ex, first, last, first_dest); } -template -std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators< - InputIteratorType, OutputIteratorType>::value, - OutputIteratorType> -inclusive_scan(const std::string& label, const ExecutionSpace& ex, - InputIteratorType first, InputIteratorType last, - OutputIteratorType first_dest) { - return Impl::inclusive_scan_default_op_impl(label, ex, first, last, - first_dest); +template && :: + Kokkos::is_execution_space_v, + int> = 0> +OutputIteratorType inclusive_scan(const std::string& label, + const ExecutionSpace& ex, + InputIteratorType first, + InputIteratorType last, + OutputIteratorType first_dest) { + return Impl::inclusive_scan_default_op_exespace_impl(label, ex, first, last, + first_dest); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto inclusive_scan( const ExecutionSpace& ex, const ::Kokkos::View& view_from, @@ -57,13 +69,15 @@ auto inclusive_scan( Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); namespace KE = ::Kokkos::Experimental; - return Impl::inclusive_scan_default_op_impl( + return Impl::inclusive_scan_default_op_exespace_impl( "Kokkos::inclusive_scan_default_functors_view_api", ex, KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto inclusive_scan( const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view_from, @@ -71,39 +85,45 @@ auto inclusive_scan( Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); namespace KE = ::Kokkos::Experimental; - return Impl::inclusive_scan_default_op_impl(label, ex, KE::cbegin(view_from), - KE::cend(view_from), - KE::begin(view_dest)); + return Impl::inclusive_scan_default_op_exespace_impl( + label, ex, KE::cbegin(view_from), KE::cend(view_from), + KE::begin(view_dest)); } // overload set 2 (accepting custom binary op) -template -std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators< - InputIteratorType, OutputIteratorType>::value, - OutputIteratorType> -inclusive_scan(const ExecutionSpace& ex, InputIteratorType first, - InputIteratorType last, OutputIteratorType first_dest, - BinaryOp binary_op) { - return Impl::inclusive_scan_custom_binary_op_impl( +template && :: + Kokkos::is_execution_space_v, + int> = 0> +OutputIteratorType inclusive_scan(const ExecutionSpace& ex, + InputIteratorType first, + InputIteratorType last, + OutputIteratorType first_dest, + BinaryOp binary_op) { + return Impl::inclusive_scan_custom_binary_op_exespace_impl( "Kokkos::inclusive_scan_custom_functors_iterator_api", ex, first, last, first_dest, binary_op); } -template -std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators< - InputIteratorType, OutputIteratorType>::value, - OutputIteratorType> -inclusive_scan(const std::string& label, const ExecutionSpace& ex, - InputIteratorType first, InputIteratorType last, - OutputIteratorType first_dest, BinaryOp binary_op) { - return Impl::inclusive_scan_custom_binary_op_impl(label, ex, first, last, - first_dest, binary_op); +template && :: + Kokkos::is_execution_space_v, + int> = 0> +OutputIteratorType inclusive_scan( + const std::string& label, const ExecutionSpace& ex, InputIteratorType first, + InputIteratorType last, OutputIteratorType first_dest, BinaryOp binary_op) { + return Impl::inclusive_scan_custom_binary_op_exespace_impl( + label, ex, first, last, first_dest, binary_op); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename BinaryOp, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto inclusive_scan(const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, @@ -111,14 +131,16 @@ auto inclusive_scan(const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); namespace KE = ::Kokkos::Experimental; - return Impl::inclusive_scan_custom_binary_op_impl( + return Impl::inclusive_scan_custom_binary_op_exespace_impl( "Kokkos::inclusive_scan_custom_functors_view_api", ex, KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), binary_op); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename BinaryOp, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto inclusive_scan(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, @@ -126,67 +148,192 @@ auto inclusive_scan(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); namespace KE = ::Kokkos::Experimental; - return Impl::inclusive_scan_custom_binary_op_impl( + return Impl::inclusive_scan_custom_binary_op_exespace_impl( label, ex, KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), binary_op); } // overload set 3 -template -std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators< - InputIteratorType, OutputIteratorType>::value, - OutputIteratorType> -inclusive_scan(const ExecutionSpace& ex, InputIteratorType first, - InputIteratorType last, OutputIteratorType first_dest, - BinaryOp binary_op, ValueType init_value) { - return Impl::inclusive_scan_custom_binary_op_impl( +template && :: + Kokkos::is_execution_space_v, + int> = 0> +OutputIteratorType inclusive_scan(const ExecutionSpace& ex, + InputIteratorType first, + InputIteratorType last, + OutputIteratorType first_dest, + BinaryOp binary_op, ValueType init_value) { + static_assert(std::is_move_constructible_v, + "ValueType must be move constructible."); + + return Impl::inclusive_scan_custom_binary_op_exespace_impl( "Kokkos::inclusive_scan_custom_functors_iterator_api", ex, first, last, - first_dest, binary_op, init_value); + first_dest, binary_op, std::move(init_value)); } -template -std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators< - InputIteratorType, OutputIteratorType>::value, - OutputIteratorType> -inclusive_scan(const std::string& label, const ExecutionSpace& ex, - InputIteratorType first, InputIteratorType last, - OutputIteratorType first_dest, BinaryOp binary_op, - ValueType init_value) { - return Impl::inclusive_scan_custom_binary_op_impl( - label, ex, first, last, first_dest, binary_op, init_value); +template && :: + Kokkos::is_execution_space_v, + int> = 0> +OutputIteratorType inclusive_scan(const std::string& label, + const ExecutionSpace& ex, + InputIteratorType first, + InputIteratorType last, + OutputIteratorType first_dest, + BinaryOp binary_op, ValueType init_value) { + static_assert(std::is_move_constructible_v, + "ValueType must be move constructible."); + + return Impl::inclusive_scan_custom_binary_op_exespace_impl( + label, ex, first, last, first_dest, binary_op, std::move(init_value)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename BinaryOp, + typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto inclusive_scan(const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, BinaryOp binary_op, ValueType init_value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + static_assert(std::is_move_constructible_v, + "ValueType must be move constructible."); + namespace KE = ::Kokkos::Experimental; - return Impl::inclusive_scan_custom_binary_op_impl( + return Impl::inclusive_scan_custom_binary_op_exespace_impl( "Kokkos::inclusive_scan_custom_functors_view_api", ex, KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), - binary_op, init_value); + binary_op, std::move(init_value)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename BinaryOp, + typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto inclusive_scan(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, BinaryOp binary_op, ValueType init_value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + static_assert(std::is_move_constructible_v, + "ValueType must be move constructible."); + namespace KE = ::Kokkos::Experimental; - return Impl::inclusive_scan_custom_binary_op_impl( + return Impl::inclusive_scan_custom_binary_op_exespace_impl( label, ex, KE::cbegin(view_from), KE::cend(view_from), - KE::begin(view_dest), binary_op, init_value); + KE::begin(view_dest), binary_op, std::move(init_value)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// + +// overload set 1 +template && :: + Kokkos::is_team_handle_v, + int> = 0> +KOKKOS_FUNCTION OutputIteratorType +inclusive_scan(const TeamHandleType& teamHandle, InputIteratorType first, + InputIteratorType last, OutputIteratorType first_dest) { + return Impl::inclusive_scan_default_op_team_impl(teamHandle, first, last, + first_dest); +} + +template , int> = 0> +KOKKOS_FUNCTION auto inclusive_scan( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view_from, + const ::Kokkos::View& view_dest) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + namespace KE = ::Kokkos::Experimental; + return Impl::inclusive_scan_default_op_team_impl( + teamHandle, KE::cbegin(view_from), KE::cend(view_from), + KE::begin(view_dest)); +} + +// overload set 2 (accepting custom binary op) +template && :: + Kokkos::is_team_handle_v, + int> = 0> +KOKKOS_FUNCTION OutputIteratorType inclusive_scan( + const TeamHandleType& teamHandle, InputIteratorType first, + InputIteratorType last, OutputIteratorType first_dest, BinaryOp binary_op) { + return Impl::inclusive_scan_custom_binary_op_team_impl( + teamHandle, first, last, first_dest, binary_op); +} + +template , int> = 0> +KOKKOS_FUNCTION auto inclusive_scan( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view_from, + const ::Kokkos::View& view_dest, + BinaryOp binary_op) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + namespace KE = ::Kokkos::Experimental; + return Impl::inclusive_scan_custom_binary_op_team_impl( + teamHandle, KE::cbegin(view_from), KE::cend(view_from), + KE::begin(view_dest), binary_op); +} + +// overload set 3 +template && :: + Kokkos::is_team_handle_v, + int> = 0> + +KOKKOS_FUNCTION OutputIteratorType +inclusive_scan(const TeamHandleType& teamHandle, InputIteratorType first, + InputIteratorType last, OutputIteratorType first_dest, + BinaryOp binary_op, ValueType init_value) { + static_assert(std::is_move_constructible_v, + "ValueType must be move constructible."); + return Impl::inclusive_scan_custom_binary_op_team_impl( + teamHandle, first, last, first_dest, binary_op, std::move(init_value)); +} + +template , int> = 0> +KOKKOS_FUNCTION auto inclusive_scan( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view_from, + const ::Kokkos::View& view_dest, + BinaryOp binary_op, ValueType init_value) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + static_assert(std::is_move_constructible_v, + "ValueType must be move constructible."); + + namespace KE = ::Kokkos::Experimental; + return Impl::inclusive_scan_custom_binary_op_team_impl( + teamHandle, KE::cbegin(view_from), KE::cend(view_from), + KE::begin(view_dest), binary_op, std::move(init_value)); } } // namespace Experimental diff --git a/algorithms/src/std_algorithms/Kokkos_IsPartitioned.hpp b/algorithms/src/std_algorithms/Kokkos_IsPartitioned.hpp index 29d6be9e8b..42f20bc4ec 100644 --- a/algorithms/src/std_algorithms/Kokkos_IsPartitioned.hpp +++ b/algorithms/src/std_algorithms/Kokkos_IsPartitioned.hpp @@ -23,39 +23,78 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename IteratorType, typename PredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool is_partitioned(const ExecutionSpace& ex, IteratorType first, IteratorType last, PredicateType p) { - return Impl::is_partitioned_impl( + return Impl::is_partitioned_exespace_impl( "Kokkos::is_partitioned_iterator_api_default", ex, first, last, std::move(p)); } -template +template < + typename ExecutionSpace, typename IteratorType, typename PredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool is_partitioned(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, PredicateType p) { - return Impl::is_partitioned_impl(label, ex, first, last, std::move(p)); + return Impl::is_partitioned_exespace_impl(label, ex, first, last, + std::move(p)); } -template +template < + typename ExecutionSpace, typename PredicateType, typename DataType, + typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool is_partitioned(const ExecutionSpace& ex, const ::Kokkos::View& v, PredicateType p) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); - return Impl::is_partitioned_impl("Kokkos::is_partitioned_view_api_default", - ex, cbegin(v), cend(v), std::move(p)); + return Impl::is_partitioned_exespace_impl( + "Kokkos::is_partitioned_view_api_default", ex, cbegin(v), cend(v), + std::move(p)); } -template +template < + typename ExecutionSpace, typename PredicateType, typename DataType, + typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool is_partitioned(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& v, PredicateType p) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); - return Impl::is_partitioned_impl(label, ex, cbegin(v), cend(v), std::move(p)); + return Impl::is_partitioned_exespace_impl(label, ex, cbegin(v), cend(v), + std::move(p)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION bool is_partitioned(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last, + PredicateType p) { + return Impl::is_partitioned_team_impl(teamHandle, first, last, std::move(p)); +} + +template , int> = 0> +KOKKOS_FUNCTION bool is_partitioned( + const TeamHandleType& teamHandle, + const ::Kokkos::View& v, PredicateType p) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + return Impl::is_partitioned_team_impl(teamHandle, cbegin(v), cend(v), + std::move(p)); } } // namespace Experimental diff --git a/algorithms/src/std_algorithms/Kokkos_IsSorted.hpp b/algorithms/src/std_algorithms/Kokkos_IsSorted.hpp index f036254a02..2c676c3ff3 100644 --- a/algorithms/src/std_algorithms/Kokkos_IsSorted.hpp +++ b/algorithms/src/std_algorithms/Kokkos_IsSorted.hpp @@ -23,55 +23,73 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename IteratorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool is_sorted(const ExecutionSpace& ex, IteratorType first, IteratorType last) { - return Impl::is_sorted_impl("Kokkos::is_sorted_iterator_api_default", ex, - first, last); + return Impl::is_sorted_exespace_impl("Kokkos::is_sorted_iterator_api_default", + ex, first, last); } -template +template < + typename ExecutionSpace, typename IteratorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool is_sorted(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last) { - return Impl::is_sorted_impl(label, ex, first, last); + return Impl::is_sorted_exespace_impl(label, ex, first, last); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool is_sorted(const ExecutionSpace& ex, const ::Kokkos::View& view) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); namespace KE = ::Kokkos::Experimental; - return Impl::is_sorted_impl("Kokkos::is_sorted_view_api_default", ex, - KE::cbegin(view), KE::cend(view)); + return Impl::is_sorted_exespace_impl("Kokkos::is_sorted_view_api_default", ex, + KE::cbegin(view), KE::cend(view)); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool is_sorted(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); namespace KE = ::Kokkos::Experimental; - return Impl::is_sorted_impl(label, ex, KE::cbegin(view), KE::cend(view)); + return Impl::is_sorted_exespace_impl(label, ex, KE::cbegin(view), + KE::cend(view)); } -template +template < + typename ExecutionSpace, typename IteratorType, typename ComparatorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool is_sorted(const ExecutionSpace& ex, IteratorType first, IteratorType last, ComparatorType comp) { Impl::static_assert_is_not_openmptarget(ex); - return Impl::is_sorted_impl("Kokkos::is_sorted_iterator_api_default", ex, - first, last, std::move(comp)); + return Impl::is_sorted_exespace_impl("Kokkos::is_sorted_iterator_api_default", + ex, first, last, std::move(comp)); } -template +template < + typename ExecutionSpace, typename IteratorType, typename ComparatorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool is_sorted(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, ComparatorType comp) { Impl::static_assert_is_not_openmptarget(ex); - return Impl::is_sorted_impl(label, ex, first, last, std::move(comp)); + return Impl::is_sorted_exespace_impl(label, ex, first, last, std::move(comp)); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename ComparatorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool is_sorted(const ExecutionSpace& ex, const ::Kokkos::View& view, ComparatorType comp) { @@ -79,13 +97,15 @@ bool is_sorted(const ExecutionSpace& ex, Impl::static_assert_is_not_openmptarget(ex); namespace KE = ::Kokkos::Experimental; - return Impl::is_sorted_impl("Kokkos::is_sorted_view_api_default", ex, - KE::cbegin(view), KE::cend(view), - std::move(comp)); + return Impl::is_sorted_exespace_impl("Kokkos::is_sorted_view_api_default", ex, + KE::cbegin(view), KE::cend(view), + std::move(comp)); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename ComparatorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool is_sorted(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, ComparatorType comp) { @@ -93,8 +113,56 @@ bool is_sorted(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_not_openmptarget(ex); namespace KE = ::Kokkos::Experimental; - return Impl::is_sorted_impl(label, ex, KE::cbegin(view), KE::cend(view), - std::move(comp)); + return Impl::is_sorted_exespace_impl(label, ex, KE::cbegin(view), + KE::cend(view), std::move(comp)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION bool is_sorted(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last) { + return Impl::is_sorted_team_impl(teamHandle, first, last); +} + +template , int> = 0> +KOKKOS_FUNCTION bool is_sorted( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + + namespace KE = ::Kokkos::Experimental; + return Impl::is_sorted_team_impl(teamHandle, KE::cbegin(view), + KE::cend(view)); +} + +template , int> = 0> +KOKKOS_FUNCTION bool is_sorted(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last, + ComparatorType comp) { + Impl::static_assert_is_not_openmptarget(teamHandle); + return Impl::is_sorted_team_impl(teamHandle, first, last, std::move(comp)); +} + +template , int> = 0> +KOKKOS_FUNCTION bool is_sorted( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view, ComparatorType comp) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + Impl::static_assert_is_not_openmptarget(teamHandle); + + namespace KE = ::Kokkos::Experimental; + return Impl::is_sorted_team_impl(teamHandle, KE::cbegin(view), KE::cend(view), + std::move(comp)); } } // namespace Experimental diff --git a/algorithms/src/std_algorithms/Kokkos_IsSortedUntil.hpp b/algorithms/src/std_algorithms/Kokkos_IsSortedUntil.hpp index 276b3bb884..96a17b6785 100644 --- a/algorithms/src/std_algorithms/Kokkos_IsSortedUntil.hpp +++ b/algorithms/src/std_algorithms/Kokkos_IsSortedUntil.hpp @@ -23,58 +23,78 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename IteratorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType is_sorted_until(const ExecutionSpace& ex, IteratorType first, IteratorType last) { - return Impl::is_sorted_until_impl( + return Impl::is_sorted_until_exespace_impl( "Kokkos::is_sorted_until_iterator_api_default", ex, first, last); } -template +template < + typename ExecutionSpace, typename IteratorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType is_sorted_until(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last) { - return Impl::is_sorted_until_impl(label, ex, first, last); + return Impl::is_sorted_until_exespace_impl(label, ex, first, last); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto is_sorted_until(const ExecutionSpace& ex, const ::Kokkos::View& view) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); namespace KE = ::Kokkos::Experimental; - return Impl::is_sorted_until_impl("Kokkos::is_sorted_until_view_api_default", - ex, KE::begin(view), KE::end(view)); + return Impl::is_sorted_until_exespace_impl( + "Kokkos::is_sorted_until_view_api_default", ex, KE::begin(view), + KE::end(view)); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto is_sorted_until(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); namespace KE = ::Kokkos::Experimental; - return Impl::is_sorted_until_impl(label, ex, KE::begin(view), KE::end(view)); + return Impl::is_sorted_until_exespace_impl(label, ex, KE::begin(view), + KE::end(view)); } -template +template < + typename ExecutionSpace, typename IteratorType, typename ComparatorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType is_sorted_until(const ExecutionSpace& ex, IteratorType first, IteratorType last, ComparatorType comp) { Impl::static_assert_is_not_openmptarget(ex); - return Impl::is_sorted_until_impl( + return Impl::is_sorted_until_exespace_impl( "Kokkos::is_sorted_until_iterator_api_default", ex, first, last, std::move(comp)); } -template +template < + typename ExecutionSpace, typename IteratorType, typename ComparatorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType is_sorted_until(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, ComparatorType comp) { Impl::static_assert_is_not_openmptarget(ex); - return Impl::is_sorted_until_impl(label, ex, first, last, std::move(comp)); + return Impl::is_sorted_until_exespace_impl(label, ex, first, last, + std::move(comp)); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename ComparatorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto is_sorted_until(const ExecutionSpace& ex, const ::Kokkos::View& view, ComparatorType comp) { @@ -82,13 +102,15 @@ auto is_sorted_until(const ExecutionSpace& ex, Impl::static_assert_is_not_openmptarget(ex); namespace KE = ::Kokkos::Experimental; - return Impl::is_sorted_until_impl("Kokkos::is_sorted_until_view_api_default", - ex, KE::begin(view), KE::end(view), - std::move(comp)); + return Impl::is_sorted_until_exespace_impl( + "Kokkos::is_sorted_until_view_api_default", ex, KE::begin(view), + KE::end(view), std::move(comp)); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename ComparatorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto is_sorted_until(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, ComparatorType comp) { @@ -96,8 +118,57 @@ auto is_sorted_until(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_not_openmptarget(ex); namespace KE = ::Kokkos::Experimental; - return Impl::is_sorted_until_impl(label, ex, KE::begin(view), KE::end(view), - std::move(comp)); + return Impl::is_sorted_until_exespace_impl(label, ex, KE::begin(view), + KE::end(view), std::move(comp)); +} + +// +// overload set accepting team handle +// +template , int> = 0> +KOKKOS_FUNCTION IteratorType is_sorted_until(const TeamHandleType& teamHandle, + IteratorType first, + IteratorType last) { + return Impl::is_sorted_until_team_impl(teamHandle, first, last); +} + +template , int> = 0> +KOKKOS_FUNCTION auto is_sorted_until( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + + namespace KE = ::Kokkos::Experimental; + return Impl::is_sorted_until_team_impl(teamHandle, KE::begin(view), + KE::end(view)); +} + +template , int> = 0> +KOKKOS_FUNCTION IteratorType is_sorted_until(const TeamHandleType& teamHandle, + IteratorType first, + IteratorType last, + ComparatorType comp) { + Impl::static_assert_is_not_openmptarget(teamHandle); + return Impl::is_sorted_until_team_impl(teamHandle, first, last, + std::move(comp)); +} + +template , int> = 0> +KOKKOS_FUNCTION auto is_sorted_until( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view, ComparatorType comp) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + Impl::static_assert_is_not_openmptarget(teamHandle); + + namespace KE = ::Kokkos::Experimental; + return Impl::is_sorted_until_team_impl(teamHandle, KE::begin(view), + KE::end(view), std::move(comp)); } } // namespace Experimental diff --git a/algorithms/src/std_algorithms/Kokkos_IterSwap.hpp b/algorithms/src/std_algorithms/Kokkos_IterSwap.hpp index a796a306dd..5bb2d1039d 100644 --- a/algorithms/src/std_algorithms/Kokkos_IterSwap.hpp +++ b/algorithms/src/std_algorithms/Kokkos_IterSwap.hpp @@ -19,7 +19,6 @@ #include #include "impl/Kokkos_Constraints.hpp" -#include "Kokkos_Swap.hpp" namespace Kokkos { namespace Experimental { @@ -33,7 +32,7 @@ struct StdIterSwapFunctor { KOKKOS_FUNCTION void operator()(int i) const { (void)i; - ::Kokkos::Experimental::swap(*m_a, *m_b); + ::Kokkos::kokkos_swap(*m_a, *m_b); } KOKKOS_FUNCTION @@ -58,6 +57,16 @@ void iter_swap(IteratorType1 a, IteratorType2 b) { Impl::iter_swap_impl(a, b); } +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 +template +KOKKOS_DEPRECATED_WITH_COMMENT("Use Kokkos::kokkos_swap instead!") +KOKKOS_FUNCTION + void swap(T& a, T& b) noexcept(::Kokkos::kokkos_swap(std::declval(), + std::declval())) { + ::Kokkos::kokkos_swap(a, b); +} +#endif + } // namespace Experimental } // namespace Kokkos diff --git a/algorithms/src/std_algorithms/Kokkos_LexicographicalCompare.hpp b/algorithms/src/std_algorithms/Kokkos_LexicographicalCompare.hpp index 0a77ef629f..e13479c370 100644 --- a/algorithms/src/std_algorithms/Kokkos_LexicographicalCompare.hpp +++ b/algorithms/src/std_algorithms/Kokkos_LexicographicalCompare.hpp @@ -23,101 +23,181 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + class ExecutionSpace, class IteratorType1, class IteratorType2, + std::enable_if_t, int> = 0> bool lexicographical_compare(const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, IteratorType2 last2) { - return Impl::lexicographical_compare_impl( + return Impl::lexicographical_compare_exespace_impl( "Kokkos::lexicographical_compare_iterator_api_default", ex, first1, last1, first2, last2); } -template +template < + class ExecutionSpace, class IteratorType1, class IteratorType2, + std::enable_if_t, int> = 0> bool lexicographical_compare(const std::string& label, const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, IteratorType2 last2) { - return Impl::lexicographical_compare_impl(label, ex, first1, last1, first2, - last2); + return Impl::lexicographical_compare_exespace_impl(label, ex, first1, last1, + first2, last2); } -template +template < + class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2, + std::enable_if_t, int> = 0> bool lexicographical_compare( const ExecutionSpace& ex, const ::Kokkos::View& view1, - ::Kokkos::View& view2) { + const ::Kokkos::View& view2) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); namespace KE = ::Kokkos::Experimental; - return Impl::lexicographical_compare_impl( + return Impl::lexicographical_compare_exespace_impl( "Kokkos::lexicographical_compare_view_api_default", ex, KE::cbegin(view1), KE::cend(view1), KE::cbegin(view2), KE::cend(view2)); } -template +template < + class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2, + std::enable_if_t, int> = 0> bool lexicographical_compare( const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view1, - ::Kokkos::View& view2) { + const ::Kokkos::View& view2) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); namespace KE = ::Kokkos::Experimental; - return Impl::lexicographical_compare_impl(label, ex, KE::cbegin(view1), - KE::cend(view1), KE::cbegin(view2), - KE::cend(view2)); + return Impl::lexicographical_compare_exespace_impl( + label, ex, KE::cbegin(view1), KE::cend(view1), KE::cbegin(view2), + KE::cend(view2)); } -template +template < + class ExecutionSpace, class IteratorType1, class IteratorType2, + class ComparatorType, + std::enable_if_t, int> = 0> bool lexicographical_compare(const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, IteratorType2 last2, ComparatorType comp) { - return Impl::lexicographical_compare_impl( + return Impl::lexicographical_compare_exespace_impl( "Kokkos::lexicographical_compare_iterator_api_default", ex, first1, last1, first2, last2, comp); } -template +template < + class ExecutionSpace, class IteratorType1, class IteratorType2, + class ComparatorType, + std::enable_if_t, int> = 0> bool lexicographical_compare(const std::string& label, const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, IteratorType2 last2, ComparatorType comp) { - return Impl::lexicographical_compare_impl(label, ex, first1, last1, first2, - last2, comp); + return Impl::lexicographical_compare_exespace_impl(label, ex, first1, last1, + first2, last2, comp); } -template +template < + class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2, class ComparatorType, + std::enable_if_t, int> = 0> bool lexicographical_compare( const ExecutionSpace& ex, const ::Kokkos::View& view1, - ::Kokkos::View& view2, ComparatorType comp) { + const ::Kokkos::View& view2, + ComparatorType comp) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); namespace KE = ::Kokkos::Experimental; - return Impl::lexicographical_compare_impl( + return Impl::lexicographical_compare_exespace_impl( "Kokkos::lexicographical_compare_view_api_default", ex, KE::cbegin(view1), KE::cend(view1), KE::cbegin(view2), KE::cend(view2), comp); } -template +template < + class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2, class ComparatorType, + std::enable_if_t, int> = 0> bool lexicographical_compare( const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view1, - ::Kokkos::View& view2, ComparatorType comp) { + const ::Kokkos::View& view2, + ComparatorType comp) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); + + namespace KE = ::Kokkos::Experimental; + return Impl::lexicographical_compare_exespace_impl( + label, ex, KE::cbegin(view1), KE::cend(view1), KE::cbegin(view2), + KE::cend(view2), comp); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION bool lexicographical_compare(const TeamHandleType& teamHandle, + IteratorType1 first1, + IteratorType1 last1, + IteratorType2 first2, + IteratorType2 last2) { + return Impl::lexicographical_compare_team_impl(teamHandle, first1, last1, + first2, last2); +} + +template , int> = 0> +KOKKOS_FUNCTION bool lexicographical_compare( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view1, + const ::Kokkos::View& view2) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); + + namespace KE = ::Kokkos::Experimental; + return Impl::lexicographical_compare_team_impl( + teamHandle, KE::cbegin(view1), KE::cend(view1), KE::cbegin(view2), + KE::cend(view2)); +} + +template , int> = 0> +KOKKOS_FUNCTION bool lexicographical_compare( + const TeamHandleType& teamHandle, IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2, IteratorType2 last2, ComparatorType comp) { + return Impl::lexicographical_compare_team_impl(teamHandle, first1, last1, + first2, last2, comp); +} + +template , int> = 0> +KOKKOS_FUNCTION bool lexicographical_compare( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view1, + const ::Kokkos::View& view2, + ComparatorType comp) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); namespace KE = ::Kokkos::Experimental; - return Impl::lexicographical_compare_impl(label, ex, KE::cbegin(view1), - KE::cend(view1), KE::cbegin(view2), - KE::cend(view2), comp); + return Impl::lexicographical_compare_team_impl( + teamHandle, KE::cbegin(view1), KE::cend(view1), KE::cbegin(view2), + KE::cend(view2), comp); } } // namespace Experimental diff --git a/algorithms/src/std_algorithms/Kokkos_MaxElement.hpp b/algorithms/src/std_algorithms/Kokkos_MaxElement.hpp index 2c1374f700..d16bac5bfc 100644 --- a/algorithms/src/std_algorithms/Kokkos_MaxElement.hpp +++ b/algorithms/src/std_algorithms/Kokkos_MaxElement.hpp @@ -23,81 +23,148 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename IteratorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto max_element(const ExecutionSpace& ex, IteratorType first, IteratorType last) { - return Impl::min_or_max_element_impl( + return Impl::min_or_max_element_exespace_impl( "Kokkos::max_element_iterator_api_default", ex, first, last); } -template +template < + typename ExecutionSpace, typename IteratorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto max_element(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last) { - return Impl::min_or_max_element_impl(label, ex, first, last); + return Impl::min_or_max_element_exespace_impl(label, ex, first, + last); } -template +template < + typename ExecutionSpace, typename IteratorType, typename ComparatorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto max_element(const ExecutionSpace& ex, IteratorType first, IteratorType last, ComparatorType comp) { Impl::static_assert_is_not_openmptarget(ex); - return Impl::min_or_max_element_impl( + return Impl::min_or_max_element_exespace_impl( "Kokkos::max_element_iterator_api_default", ex, first, last, std::move(comp)); } -template +template < + typename ExecutionSpace, typename IteratorType, typename ComparatorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto max_element(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, ComparatorType comp) { Impl::static_assert_is_not_openmptarget(ex); - return Impl::min_or_max_element_impl( + return Impl::min_or_max_element_exespace_impl( label, ex, first, last, std::move(comp)); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto max_element(const ExecutionSpace& ex, const ::Kokkos::View& v) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); - return Impl::min_or_max_element_impl( + return Impl::min_or_max_element_exespace_impl( "Kokkos::max_element_view_api_default", ex, begin(v), end(v)); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto max_element(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& v) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); - return Impl::min_or_max_element_impl(label, ex, begin(v), - end(v)); + return Impl::min_or_max_element_exespace_impl(label, ex, + begin(v), end(v)); } -template +template < + typename ExecutionSpace, typename DataType, typename ComparatorType, + typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto max_element(const ExecutionSpace& ex, const ::Kokkos::View& v, ComparatorType comp) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); Impl::static_assert_is_not_openmptarget(ex); - return Impl::min_or_max_element_impl( + return Impl::min_or_max_element_exespace_impl( "Kokkos::max_element_view_api_default", ex, begin(v), end(v), std::move(comp)); } -template +template < + typename ExecutionSpace, typename DataType, typename ComparatorType, + typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto max_element(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& v, ComparatorType comp) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); Impl::static_assert_is_not_openmptarget(ex); - return Impl::min_or_max_element_impl( + return Impl::min_or_max_element_exespace_impl( label, ex, begin(v), end(v), std::move(comp)); } +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION auto max_element(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last) { + return Impl::min_or_max_element_team_impl(teamHandle, first, + last); +} + +template , int> = 0> +KOKKOS_FUNCTION auto max_element( + const TeamHandleType& teamHandle, + const ::Kokkos::View& v) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + + return Impl::min_or_max_element_team_impl(teamHandle, begin(v), + end(v)); +} + +template , int> = 0> +KOKKOS_FUNCTION auto max_element(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last, + ComparatorType comp) { + Impl::static_assert_is_not_openmptarget(teamHandle); + return Impl::min_or_max_element_team_impl( + teamHandle, first, last, std::move(comp)); +} + +template , int> = 0> +KOKKOS_FUNCTION auto max_element( + const TeamHandleType& teamHandle, + const ::Kokkos::View& v, ComparatorType comp) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + Impl::static_assert_is_not_openmptarget(teamHandle); + return Impl::min_or_max_element_team_impl( + teamHandle, begin(v), end(v), std::move(comp)); +} + } // namespace Experimental } // namespace Kokkos diff --git a/algorithms/src/std_algorithms/Kokkos_MinElement.hpp b/algorithms/src/std_algorithms/Kokkos_MinElement.hpp index 1d03b7c962..2a53fce3e2 100644 --- a/algorithms/src/std_algorithms/Kokkos_MinElement.hpp +++ b/algorithms/src/std_algorithms/Kokkos_MinElement.hpp @@ -23,81 +23,148 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename IteratorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto min_element(const ExecutionSpace& ex, IteratorType first, IteratorType last) { - return Impl::min_or_max_element_impl( + return Impl::min_or_max_element_exespace_impl( "Kokkos::min_element_iterator_api_default", ex, first, last); } -template +template < + typename ExecutionSpace, typename IteratorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto min_element(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last) { - return Impl::min_or_max_element_impl(label, ex, first, last); + return Impl::min_or_max_element_exespace_impl(label, ex, first, + last); } -template +template < + typename ExecutionSpace, typename IteratorType, typename ComparatorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto min_element(const ExecutionSpace& ex, IteratorType first, IteratorType last, ComparatorType comp) { Impl::static_assert_is_not_openmptarget(ex); - return Impl::min_or_max_element_impl( + return Impl::min_or_max_element_exespace_impl( "Kokkos::min_element_iterator_api_default", ex, first, last, std::move(comp)); } -template +template < + typename ExecutionSpace, typename IteratorType, typename ComparatorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto min_element(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, ComparatorType comp) { Impl::static_assert_is_not_openmptarget(ex); - return Impl::min_or_max_element_impl( + return Impl::min_or_max_element_exespace_impl( label, ex, first, last, std::move(comp)); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto min_element(const ExecutionSpace& ex, const ::Kokkos::View& v) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); - return Impl::min_or_max_element_impl( + return Impl::min_or_max_element_exespace_impl( "Kokkos::min_element_view_api_default", ex, begin(v), end(v)); } -template +template < + typename ExecutionSpace, typename DataType, typename ComparatorType, + typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto min_element(const ExecutionSpace& ex, const ::Kokkos::View& v, ComparatorType comp) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); Impl::static_assert_is_not_openmptarget(ex); - return Impl::min_or_max_element_impl( + return Impl::min_or_max_element_exespace_impl( "Kokkos::min_element_view_api_default", ex, begin(v), end(v), std::move(comp)); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto min_element(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& v) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); - return Impl::min_or_max_element_impl(label, ex, begin(v), - end(v)); + return Impl::min_or_max_element_exespace_impl(label, ex, + begin(v), end(v)); } -template +template < + typename ExecutionSpace, typename DataType, typename ComparatorType, + typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto min_element(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& v, ComparatorType comp) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); Impl::static_assert_is_not_openmptarget(ex); - return Impl::min_or_max_element_impl( + return Impl::min_or_max_element_exespace_impl( label, ex, begin(v), end(v), std::move(comp)); } +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION auto min_element(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last) { + return Impl::min_or_max_element_team_impl(teamHandle, first, + last); +} + +template , int> = 0> +KOKKOS_FUNCTION auto min_element( + const TeamHandleType& teamHandle, + const ::Kokkos::View& v) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + + return Impl::min_or_max_element_team_impl(teamHandle, begin(v), + end(v)); +} + +template , int> = 0> +KOKKOS_FUNCTION auto min_element(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last, + ComparatorType comp) { + Impl::static_assert_is_not_openmptarget(teamHandle); + return Impl::min_or_max_element_team_impl( + teamHandle, first, last, std::move(comp)); +} + +template , int> = 0> +KOKKOS_FUNCTION auto min_element( + const TeamHandleType& teamHandle, + const ::Kokkos::View& v, ComparatorType comp) { + Impl::static_assert_is_not_openmptarget(teamHandle); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + return Impl::min_or_max_element_team_impl( + teamHandle, begin(v), end(v), std::move(comp)); +} + } // namespace Experimental } // namespace Kokkos diff --git a/algorithms/src/std_algorithms/Kokkos_MinMaxElement.hpp b/algorithms/src/std_algorithms/Kokkos_MinMaxElement.hpp index d481b499cc..c3a1f73ef6 100644 --- a/algorithms/src/std_algorithms/Kokkos_MinMaxElement.hpp +++ b/algorithms/src/std_algorithms/Kokkos_MinMaxElement.hpp @@ -23,82 +23,151 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename IteratorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto minmax_element(const ExecutionSpace& ex, IteratorType first, IteratorType last) { - return Impl::minmax_element_impl( + return Impl::minmax_element_exespace_impl( "Kokkos::minmax_element_iterator_api_default", ex, first, last); } -template +template < + typename ExecutionSpace, typename IteratorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto minmax_element(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last) { - return Impl::minmax_element_impl(label, ex, first, last); + return Impl::minmax_element_exespace_impl(label, ex, + first, last); } -template +template < + typename ExecutionSpace, typename IteratorType, typename ComparatorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto minmax_element(const ExecutionSpace& ex, IteratorType first, IteratorType last, ComparatorType comp) { Impl::static_assert_is_not_openmptarget(ex); - return Impl::minmax_element_impl( + return Impl::minmax_element_exespace_impl( "Kokkos::minmax_element_iterator_api_default", ex, first, last, std::move(comp)); } -template +template < + typename ExecutionSpace, typename IteratorType, typename ComparatorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto minmax_element(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, ComparatorType comp) { Impl::static_assert_is_not_openmptarget(ex); - return Impl::minmax_element_impl( + return Impl::minmax_element_exespace_impl( label, ex, first, last, std::move(comp)); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto minmax_element(const ExecutionSpace& ex, const ::Kokkos::View& v) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); - return Impl::minmax_element_impl( + return Impl::minmax_element_exespace_impl( "Kokkos::minmax_element_view_api_default", ex, begin(v), end(v)); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto minmax_element(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& v) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); - return Impl::minmax_element_impl(label, ex, begin(v), - end(v)); + return Impl::minmax_element_exespace_impl( + label, ex, begin(v), end(v)); } -template +template < + typename ExecutionSpace, typename DataType, typename ComparatorType, + typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto minmax_element(const ExecutionSpace& ex, const ::Kokkos::View& v, ComparatorType comp) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); Impl::static_assert_is_not_openmptarget(ex); - return Impl::minmax_element_impl( + return Impl::minmax_element_exespace_impl( "Kokkos::minmax_element_view_api_default", ex, begin(v), end(v), std::move(comp)); } -template +template < + typename ExecutionSpace, typename DataType, typename ComparatorType, + typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto minmax_element(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& v, ComparatorType comp) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); Impl::static_assert_is_not_openmptarget(ex); - return Impl::minmax_element_impl( + return Impl::minmax_element_exespace_impl( label, ex, begin(v), end(v), std::move(comp)); } +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION auto minmax_element(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last) { + return Impl::minmax_element_team_impl(teamHandle, first, + last); +} + +template , int> = 0> +KOKKOS_FUNCTION auto minmax_element(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last, + ComparatorType comp) { + Impl::static_assert_is_not_openmptarget(teamHandle); + + return Impl::minmax_element_team_impl( + teamHandle, first, last, std::move(comp)); +} + +template , int> = 0> +KOKKOS_FUNCTION auto minmax_element( + const TeamHandleType& teamHandle, + const ::Kokkos::View& v) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + + return Impl::minmax_element_team_impl(teamHandle, + begin(v), end(v)); +} + +template , int> = 0> +KOKKOS_FUNCTION auto minmax_element( + const TeamHandleType& teamHandle, + const ::Kokkos::View& v, ComparatorType comp) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + Impl::static_assert_is_not_openmptarget(teamHandle); + + return Impl::minmax_element_team_impl( + teamHandle, begin(v), end(v), std::move(comp)); +} + } // namespace Experimental } // namespace Kokkos diff --git a/algorithms/src/std_algorithms/Kokkos_Mismatch.hpp b/algorithms/src/std_algorithms/Kokkos_Mismatch.hpp index 13c994ca90..090afe69e3 100644 --- a/algorithms/src/std_algorithms/Kokkos_Mismatch.hpp +++ b/algorithms/src/std_algorithms/Kokkos_Mismatch.hpp @@ -30,46 +30,60 @@ namespace Experimental { // // makes API ambiguous (with the overload accepting views). -template +// +// overload set accepting execution space +// +template < + class ExecutionSpace, class IteratorType1, class IteratorType2, + std::enable_if_t, int> = 0> ::Kokkos::pair mismatch(const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, IteratorType2 last2) { - return Impl::mismatch_impl("Kokkos::mismatch_iterator_api_default", ex, - first1, last1, first2, last2); + return Impl::mismatch_exespace_impl("Kokkos::mismatch_iterator_api_default", + ex, first1, last1, first2, last2); } -template +template < + class ExecutionSpace, class IteratorType1, class IteratorType2, + class BinaryPredicateType, + std::enable_if_t, int> = 0> ::Kokkos::pair mismatch( const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, IteratorType2 last2, BinaryPredicateType&& predicate) { - return Impl::mismatch_impl("Kokkos::mismatch_iterator_api_default", ex, - first1, last1, first2, last2, - std::forward(predicate)); + return Impl::mismatch_exespace_impl( + "Kokkos::mismatch_iterator_api_default", ex, first1, last1, first2, last2, + std::forward(predicate)); } -template +template < + class ExecutionSpace, class IteratorType1, class IteratorType2, + std::enable_if_t, int> = 0> ::Kokkos::pair mismatch( const std::string& label, const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, IteratorType2 last2) { - return Impl::mismatch_impl(label, ex, first1, last1, first2, last2); + return Impl::mismatch_exespace_impl(label, ex, first1, last1, first2, last2); } -template +template < + class ExecutionSpace, class IteratorType1, class IteratorType2, + class BinaryPredicateType, + std::enable_if_t, int> = 0> ::Kokkos::pair mismatch( const std::string& label, const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, IteratorType2 last2, BinaryPredicateType&& predicate) { - return Impl::mismatch_impl(label, ex, first1, last1, first2, last2, - std::forward(predicate)); + return Impl::mismatch_exespace_impl( + label, ex, first1, last1, first2, last2, + std::forward(predicate)); } -template +template < + class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2, + std::enable_if_t, int> = 0> auto mismatch(const ExecutionSpace& ex, const ::Kokkos::View& view1, const ::Kokkos::View& view2) { @@ -77,13 +91,15 @@ auto mismatch(const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); namespace KE = ::Kokkos::Experimental; - return Impl::mismatch_impl("Kokkos::mismatch_view_api_default", ex, - KE::begin(view1), KE::end(view1), KE::begin(view2), - KE::end(view2)); + return Impl::mismatch_exespace_impl("Kokkos::mismatch_view_api_default", ex, + KE::begin(view1), KE::end(view1), + KE::begin(view2), KE::end(view2)); } -template +template < + class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2, class BinaryPredicateType, + std::enable_if_t, int> = 0> auto mismatch(const ExecutionSpace& ex, const ::Kokkos::View& view1, const ::Kokkos::View& view2, @@ -92,14 +108,16 @@ auto mismatch(const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); namespace KE = ::Kokkos::Experimental; - return Impl::mismatch_impl("Kokkos::mismatch_view_api_default", ex, - KE::begin(view1), KE::end(view1), KE::begin(view2), - KE::end(view2), - std::forward(predicate)); + return Impl::mismatch_exespace_impl( + "Kokkos::mismatch_view_api_default", ex, KE::begin(view1), KE::end(view1), + KE::begin(view2), KE::end(view2), + std::forward(predicate)); } -template +template < + class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2, + std::enable_if_t, int> = 0> auto mismatch(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view1, const ::Kokkos::View& view2) { @@ -107,12 +125,15 @@ auto mismatch(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); namespace KE = ::Kokkos::Experimental; - return Impl::mismatch_impl(label, ex, KE::begin(view1), KE::end(view1), - KE::begin(view2), KE::end(view2)); + return Impl::mismatch_exespace_impl(label, ex, KE::begin(view1), + KE::end(view1), KE::begin(view2), + KE::end(view2)); } -template +template < + class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2, class BinaryPredicateType, + std::enable_if_t, int> = 0> auto mismatch(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view1, const ::Kokkos::View& view2, @@ -121,9 +142,65 @@ auto mismatch(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); namespace KE = ::Kokkos::Experimental; - return Impl::mismatch_impl(label, ex, KE::begin(view1), KE::end(view1), - KE::begin(view2), KE::end(view2), - std::forward(predicate)); + return Impl::mismatch_exespace_impl( + label, ex, KE::begin(view1), KE::end(view1), KE::begin(view2), + KE::end(view2), std::forward(predicate)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION ::Kokkos::pair mismatch( + const TeamHandleType& teamHandle, IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2, IteratorType2 last2) { + return Impl::mismatch_team_impl(teamHandle, first1, last1, first2, last2); +} + +template , int> = 0> +KOKKOS_FUNCTION ::Kokkos::pair mismatch( + const TeamHandleType& teamHandle, IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2, IteratorType2 last2, + BinaryPredicateType&& predicate) { + return Impl::mismatch_team_impl(teamHandle, first1, last1, first2, last2, + std::forward(predicate)); +} + +template , int> = 0> +KOKKOS_FUNCTION auto mismatch( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view1, + const ::Kokkos::View& view2) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); + + namespace KE = ::Kokkos::Experimental; + return Impl::mismatch_team_impl(teamHandle, KE::begin(view1), KE::end(view1), + KE::begin(view2), KE::end(view2)); +} + +template , int> = 0> +KOKKOS_FUNCTION auto mismatch( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view1, + const ::Kokkos::View& view2, + BinaryPredicateType&& predicate) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); + + namespace KE = ::Kokkos::Experimental; + return Impl::mismatch_team_impl(teamHandle, KE::begin(view1), KE::end(view1), + KE::begin(view2), KE::end(view2), + std::forward(predicate)); } } // namespace Experimental diff --git a/algorithms/src/std_algorithms/Kokkos_Move.hpp b/algorithms/src/std_algorithms/Kokkos_Move.hpp index d49acd9f70..ac308ea184 100644 --- a/algorithms/src/std_algorithms/Kokkos_Move.hpp +++ b/algorithms/src/std_algorithms/Kokkos_Move.hpp @@ -23,41 +23,81 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator move(const ExecutionSpace& ex, InputIterator first, InputIterator last, OutputIterator d_first) { - return Impl::move_impl("Kokkos::move_iterator_api_default", ex, first, last, - d_first); + return Impl::move_exespace_impl("Kokkos::move_iterator_api_default", ex, + first, last, d_first); } -template +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator move(const std::string& label, const ExecutionSpace& ex, InputIterator first, InputIterator last, OutputIterator d_first) { - return Impl::move_impl(label, ex, first, last, d_first); + return Impl::move_exespace_impl(label, ex, first, last, d_first); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto move(const ExecutionSpace& ex, const ::Kokkos::View& source, - ::Kokkos::View& dest) { + const ::Kokkos::View& dest) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - return Impl::move_impl("Kokkos::move_view_api_default", ex, begin(source), - end(source), begin(dest)); + return Impl::move_exespace_impl("Kokkos::move_view_api_default", ex, + begin(source), end(source), begin(dest)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto move(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& source, - ::Kokkos::View& dest) { + const ::Kokkos::View& dest) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); + + return Impl::move_exespace_impl(label, ex, begin(source), end(source), + begin(dest)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION OutputIterator move(const TeamHandleType& teamHandle, + InputIterator first, InputIterator last, + OutputIterator d_first) { + return Impl::move_team_impl(teamHandle, first, last, d_first); +} + +template , int> = 0> +KOKKOS_FUNCTION auto move( + const TeamHandleType& teamHandle, + const ::Kokkos::View& source, + const ::Kokkos::View& dest) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - return Impl::move_impl(label, ex, begin(source), end(source), begin(dest)); + return Impl::move_team_impl(teamHandle, begin(source), end(source), + begin(dest)); } } // namespace Experimental diff --git a/algorithms/src/std_algorithms/Kokkos_MoveBackward.hpp b/algorithms/src/std_algorithms/Kokkos_MoveBackward.hpp index 60d50fa881..2789ab2179 100644 --- a/algorithms/src/std_algorithms/Kokkos_MoveBackward.hpp +++ b/algorithms/src/std_algorithms/Kokkos_MoveBackward.hpp @@ -23,42 +23,83 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename IteratorType1, typename IteratorType2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType2 move_backward(const ExecutionSpace& ex, IteratorType1 first, IteratorType1 last, IteratorType2 d_last) { - return Impl::move_backward_impl("Kokkos::move_backward_iterator_api_default", - ex, first, last, d_last); + return Impl::move_backward_exespace_impl( + "Kokkos::move_backward_iterator_api_default", ex, first, last, d_last); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto move_backward(const ExecutionSpace& ex, const ::Kokkos::View& source, - ::Kokkos::View& dest) { + const ::Kokkos::View& dest) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - return Impl::move_backward_impl("Kokkos::move_backward_view_api_default", ex, - begin(source), end(source), end(dest)); + return Impl::move_backward_exespace_impl( + "Kokkos::move_backward_view_api_default", ex, begin(source), end(source), + end(dest)); } -template +template < + typename ExecutionSpace, typename IteratorType1, typename IteratorType2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType2 move_backward(const std::string& label, const ExecutionSpace& ex, IteratorType1 first, IteratorType1 last, IteratorType2 d_last) { - return Impl::move_backward_impl(label, ex, first, last, d_last); + return Impl::move_backward_exespace_impl(label, ex, first, last, d_last); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto move_backward(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& source, - ::Kokkos::View& dest) { + const ::Kokkos::View& dest) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); + + return Impl::move_backward_exespace_impl(label, ex, begin(source), + end(source), end(dest)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION IteratorType2 move_backward(const TeamHandleType& teamHandle, + IteratorType1 first, + IteratorType1 last, + IteratorType2 d_last) { + return Impl::move_backward_team_impl(teamHandle, first, last, d_last); +} + +template , int> = 0> +KOKKOS_FUNCTION auto move_backward( + const TeamHandleType& teamHandle, + const ::Kokkos::View& source, + const ::Kokkos::View& dest) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - return Impl::move_backward_impl(label, ex, begin(source), end(source), - end(dest)); + return Impl::move_backward_team_impl(teamHandle, begin(source), end(source), + end(dest)); } } // namespace Experimental diff --git a/algorithms/src/std_algorithms/Kokkos_NoneOf.hpp b/algorithms/src/std_algorithms/Kokkos_NoneOf.hpp index cf5de3b72b..f7baab3fc0 100644 --- a/algorithms/src/std_algorithms/Kokkos_NoneOf.hpp +++ b/algorithms/src/std_algorithms/Kokkos_NoneOf.hpp @@ -23,41 +23,80 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename IteratorType, typename Predicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool none_of(const ExecutionSpace& ex, IteratorType first, IteratorType last, Predicate predicate) { - return Impl::none_of_impl("Kokkos::none_of_iterator_api_default", ex, first, - last, predicate); + return Impl::none_of_exespace_impl("Kokkos::none_of_iterator_api_default", ex, + first, last, predicate); } -template +template < + typename ExecutionSpace, typename IteratorType, typename Predicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool none_of(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, Predicate predicate) { - return Impl::none_of_impl(label, ex, first, last, predicate); + return Impl::none_of_exespace_impl(label, ex, first, last, predicate); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename Predicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool none_of(const ExecutionSpace& ex, const ::Kokkos::View& v, Predicate predicate) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; - return Impl::none_of_impl("Kokkos::none_of_view_api_default", ex, - KE::cbegin(v), KE::cend(v), std::move(predicate)); + return Impl::none_of_exespace_impl("Kokkos::none_of_view_api_default", ex, + KE::cbegin(v), KE::cend(v), + std::move(predicate)); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename Predicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool none_of(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& v, Predicate predicate) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; - return Impl::none_of_impl(label, ex, KE::cbegin(v), KE::cend(v), - std::move(predicate)); + return Impl::none_of_exespace_impl(label, ex, KE::cbegin(v), KE::cend(v), + std::move(predicate)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template +KOKKOS_FUNCTION + std::enable_if_t<::Kokkos::is_team_handle::value, bool> + none_of(const TeamHandleType& teamHandle, IteratorType first, + IteratorType last, Predicate predicate) { + return Impl::none_of_team_impl(teamHandle, first, last, predicate); +} + +template +KOKKOS_FUNCTION + std::enable_if_t<::Kokkos::is_team_handle::value, bool> + none_of(const TeamHandleType& teamHandle, + const ::Kokkos::View& v, + Predicate predicate) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + + namespace KE = ::Kokkos::Experimental; + return Impl::none_of_team_impl(teamHandle, KE::cbegin(v), KE::cend(v), + std::move(predicate)); } } // namespace Experimental diff --git a/algorithms/src/std_algorithms/Kokkos_PartitionCopy.hpp b/algorithms/src/std_algorithms/Kokkos_PartitionCopy.hpp index 38c0a35b62..a1feee8d6d 100644 --- a/algorithms/src/std_algorithms/Kokkos_PartitionCopy.hpp +++ b/algorithms/src/std_algorithms/Kokkos_PartitionCopy.hpp @@ -23,57 +23,103 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename InputIteratorType, + typename OutputIteratorTrueType, typename OutputIteratorFalseType, + typename PredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> ::Kokkos::pair partition_copy( const ExecutionSpace& ex, InputIteratorType from_first, InputIteratorType from_last, OutputIteratorTrueType to_first_true, OutputIteratorFalseType to_first_false, PredicateType p) { - return Impl::partition_copy_impl( + return Impl::partition_copy_exespace_impl( "Kokkos::partition_copy_iterator_api_default", ex, from_first, from_last, to_first_true, to_first_false, std::move(p)); } -template +template < + typename ExecutionSpace, typename InputIteratorType, + typename OutputIteratorTrueType, typename OutputIteratorFalseType, + typename PredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> ::Kokkos::pair partition_copy( const std::string& label, const ExecutionSpace& ex, InputIteratorType from_first, InputIteratorType from_last, OutputIteratorTrueType to_first_true, OutputIteratorFalseType to_first_false, PredicateType p) { - return Impl::partition_copy_impl(label, ex, from_first, from_last, - to_first_true, to_first_false, std::move(p)); + return Impl::partition_copy_exespace_impl(label, ex, from_first, from_last, + to_first_true, to_first_false, + std::move(p)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename DataType3, + typename... Properties3, typename PredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto partition_copy( const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest_true, const ::Kokkos::View& view_dest_false, PredicateType p) { - return Impl::partition_copy_impl("Kokkos::partition_copy_view_api_default", - ex, cbegin(view_from), cend(view_from), - begin(view_dest_true), - begin(view_dest_false), std::move(p)); + return Impl::partition_copy_exespace_impl( + "Kokkos::partition_copy_view_api_default", ex, cbegin(view_from), + cend(view_from), begin(view_dest_true), begin(view_dest_false), + std::move(p)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename DataType3, + typename... Properties3, typename PredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto partition_copy( const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest_true, const ::Kokkos::View& view_dest_false, PredicateType p) { - return Impl::partition_copy_impl(label, ex, cbegin(view_from), - cend(view_from), begin(view_dest_true), - begin(view_dest_false), std::move(p)); + return Impl::partition_copy_exespace_impl( + label, ex, cbegin(view_from), cend(view_from), begin(view_dest_true), + begin(view_dest_false), std::move(p)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION ::Kokkos::pair +partition_copy(const TeamHandleType& teamHandle, InputIteratorType from_first, + InputIteratorType from_last, + OutputIteratorTrueType to_first_true, + OutputIteratorFalseType to_first_false, PredicateType p) { + return Impl::partition_copy_team_impl(teamHandle, from_first, from_last, + to_first_true, to_first_false, + std::move(p)); +} + +template , int> = 0> +KOKKOS_FUNCTION auto partition_copy( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view_from, + const ::Kokkos::View& view_dest_true, + const ::Kokkos::View& view_dest_false, + PredicateType p) { + return Impl::partition_copy_team_impl(teamHandle, cbegin(view_from), + cend(view_from), begin(view_dest_true), + begin(view_dest_false), std::move(p)); } } // namespace Experimental diff --git a/algorithms/src/std_algorithms/Kokkos_PartitionPoint.hpp b/algorithms/src/std_algorithms/Kokkos_PartitionPoint.hpp index 24798e377e..60cbeeda87 100644 --- a/algorithms/src/std_algorithms/Kokkos_PartitionPoint.hpp +++ b/algorithms/src/std_algorithms/Kokkos_PartitionPoint.hpp @@ -23,38 +23,78 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename IteratorType, typename UnaryPredicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType partition_point(const ExecutionSpace& ex, IteratorType first, IteratorType last, UnaryPredicate p) { - return Impl::partition_point_impl( + return Impl::partition_point_exespace_impl( "Kokkos::partitioned_point_iterator_api_default", ex, first, last, std::move(p)); } -template +template < + typename ExecutionSpace, typename IteratorType, typename UnaryPredicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType partition_point(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, UnaryPredicate p) { - return Impl::partition_point_impl(label, ex, first, last, std::move(p)); + return Impl::partition_point_exespace_impl(label, ex, first, last, + std::move(p)); } -template +template < + typename ExecutionSpace, typename UnaryPredicate, typename DataType, + typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto partition_point(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& v, UnaryPredicate p) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); - return Impl::partition_point_impl(label, ex, begin(v), end(v), std::move(p)); + return Impl::partition_point_exespace_impl(label, ex, begin(v), end(v), + std::move(p)); } -template +template < + typename ExecutionSpace, typename UnaryPredicate, typename DataType, + typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto partition_point(const ExecutionSpace& ex, const ::Kokkos::View& v, UnaryPredicate p) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); - return Impl::partition_point_impl("Kokkos::partition_point_view_api_default", - ex, begin(v), end(v), std::move(p)); + return Impl::partition_point_exespace_impl( + "Kokkos::partition_point_view_api_default", ex, begin(v), end(v), + std::move(p)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION IteratorType partition_point(const TeamHandleType& teamHandle, + IteratorType first, + IteratorType last, + UnaryPredicate p) { + return Impl::partition_point_team_impl(teamHandle, first, last, std::move(p)); +} + +template , int> = 0> +KOKKOS_FUNCTION auto partition_point( + const TeamHandleType& teamHandle, + const ::Kokkos::View& v, UnaryPredicate p) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + return Impl::partition_point_team_impl(teamHandle, begin(v), end(v), + std::move(p)); } } // namespace Experimental diff --git a/algorithms/src/std_algorithms/Kokkos_Reduce.hpp b/algorithms/src/std_algorithms/Kokkos_Reduce.hpp index a31fa1497a..b84f00f8bb 100644 --- a/algorithms/src/std_algorithms/Kokkos_Reduce.hpp +++ b/algorithms/src/std_algorithms/Kokkos_Reduce.hpp @@ -23,28 +23,38 @@ namespace Kokkos { namespace Experimental { +// +// overload set accepting execution space +// + // // overload set 1 // -template +template ::value, + int> = 0> typename IteratorType::value_type reduce(const ExecutionSpace& ex, IteratorType first, IteratorType last) { - return Impl::reduce_default_functors_impl( + return Impl::reduce_default_functors_exespace_impl( "Kokkos::reduce_default_functors_iterator_api", ex, first, last, typename IteratorType::value_type()); } -template +template ::value, + int> = 0> typename IteratorType::value_type reduce(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last) { - return Impl::reduce_default_functors_impl( + return Impl::reduce_default_functors_exespace_impl( label, ex, first, last, typename IteratorType::value_type()); } -template +template ::value, + int> = 0> auto reduce(const ExecutionSpace& ex, const ::Kokkos::View& view) { namespace KE = ::Kokkos::Experimental; @@ -53,12 +63,14 @@ auto reduce(const ExecutionSpace& ex, using view_type = ::Kokkos::View; using value_type = typename view_type::value_type; - return Impl::reduce_default_functors_impl( + return Impl::reduce_default_functors_exespace_impl( "Kokkos::reduce_default_functors_view_api", ex, KE::cbegin(view), KE::cend(view), value_type()); } -template +template ::value, + int> = 0> auto reduce(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view) { namespace KE = ::Kokkos::Experimental; @@ -67,37 +79,43 @@ auto reduce(const std::string& label, const ExecutionSpace& ex, using view_type = ::Kokkos::View; using value_type = typename view_type::value_type; - return Impl::reduce_default_functors_impl(label, ex, KE::cbegin(view), - KE::cend(view), value_type()); + return Impl::reduce_default_functors_exespace_impl( + label, ex, KE::cbegin(view), KE::cend(view), value_type()); } // // overload set2: // -template +template ::value, + int> = 0> ValueType reduce(const ExecutionSpace& ex, IteratorType first, IteratorType last, ValueType init_reduction_value) { static_assert(std::is_move_constructible::value, "ValueType must be move constructible."); - return Impl::reduce_default_functors_impl( + return Impl::reduce_default_functors_exespace_impl( "Kokkos::reduce_default_functors_iterator_api", ex, first, last, init_reduction_value); } -template +template ::value, + int> = 0> ValueType reduce(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, ValueType init_reduction_value) { static_assert(std::is_move_constructible::value, "ValueType must be move constructible."); - return Impl::reduce_default_functors_impl(label, ex, first, last, - init_reduction_value); + return Impl::reduce_default_functors_exespace_impl(label, ex, first, last, + init_reduction_value); } -template +template ::value, + int> = 0> ValueType reduce(const ExecutionSpace& ex, const ::Kokkos::View& view, ValueType init_reduction_value) { @@ -107,13 +125,15 @@ ValueType reduce(const ExecutionSpace& ex, namespace KE = ::Kokkos::Experimental; Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::reduce_default_functors_impl( + return Impl::reduce_default_functors_exespace_impl( "Kokkos::reduce_default_functors_view_api", ex, KE::cbegin(view), KE::cend(view), init_reduction_value); } -template +template ::value, + int> = 0> ValueType reduce(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, ValueType init_reduction_value) { @@ -123,40 +143,46 @@ ValueType reduce(const std::string& label, const ExecutionSpace& ex, namespace KE = ::Kokkos::Experimental; Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::reduce_default_functors_impl( + return Impl::reduce_default_functors_exespace_impl( label, ex, KE::cbegin(view), KE::cend(view), init_reduction_value); } // // overload set 3 // -template +template ::value, + int> = 0> ValueType reduce(const ExecutionSpace& ex, IteratorType first, IteratorType last, ValueType init_reduction_value, BinaryOp joiner) { static_assert(std::is_move_constructible::value, "ValueType must be move constructible."); - return Impl::reduce_custom_functors_impl( + return Impl::reduce_custom_functors_exespace_impl( "Kokkos::reduce_default_functors_iterator_api", ex, first, last, init_reduction_value, joiner); } -template +template ::value, + int> = 0> ValueType reduce(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, ValueType init_reduction_value, BinaryOp joiner) { static_assert(std::is_move_constructible::value, "ValueType must be move constructible."); - return Impl::reduce_custom_functors_impl(label, ex, first, last, - init_reduction_value, joiner); + return Impl::reduce_custom_functors_exespace_impl( + label, ex, first, last, init_reduction_value, joiner); } -template +template ::value, + int> = 0> ValueType reduce(const ExecutionSpace& ex, const ::Kokkos::View& view, ValueType init_reduction_value, BinaryOp joiner) { @@ -166,13 +192,15 @@ ValueType reduce(const ExecutionSpace& ex, namespace KE = ::Kokkos::Experimental; Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::reduce_custom_functors_impl( + return Impl::reduce_custom_functors_exespace_impl( "Kokkos::reduce_custom_functors_view_api", ex, KE::cbegin(view), KE::cend(view), init_reduction_value, joiner); } -template +template ::value, + int> = 0> ValueType reduce(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, ValueType init_reduction_value, BinaryOp joiner) { @@ -182,9 +210,114 @@ ValueType reduce(const std::string& label, const ExecutionSpace& ex, namespace KE = ::Kokkos::Experimental; Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::reduce_custom_functors_impl(label, ex, KE::cbegin(view), - KE::cend(view), init_reduction_value, - joiner); + return Impl::reduce_custom_functors_exespace_impl( + label, ex, KE::cbegin(view), KE::cend(view), init_reduction_value, + joiner); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// + +// +// overload set 1 +// +template < + typename TeamHandleType, typename IteratorType, + std::enable_if_t<::Kokkos::is_team_handle::value, int> = 0> +KOKKOS_FUNCTION typename IteratorType::value_type reduce( + const TeamHandleType& teamHandle, IteratorType first, IteratorType last) { + return Impl::reduce_default_functors_team_impl( + teamHandle, first, last, typename IteratorType::value_type()); +} + +template < + typename TeamHandleType, typename DataType, typename... Properties, + std::enable_if_t<::Kokkos::is_team_handle::value, int> = 0> +KOKKOS_FUNCTION auto reduce( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view) { + namespace KE = ::Kokkos::Experimental; + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + + using view_type = ::Kokkos::View; + using value_type = typename view_type::value_type; + + return Impl::reduce_default_functors_team_impl(teamHandle, KE::cbegin(view), + KE::cend(view), value_type()); +} + +// +// overload set2: +// +template < + typename TeamHandleType, typename IteratorType, typename ValueType, + std::enable_if_t<::Kokkos::is_team_handle::value, int> = 0> +KOKKOS_FUNCTION ValueType reduce(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last, + ValueType init_reduction_value) { + static_assert(std::is_move_constructible::value, + "ValueType must be move constructible."); + + return Impl::reduce_default_functors_team_impl(teamHandle, first, last, + init_reduction_value); +} + +template < + typename TeamHandleType, typename DataType, typename... Properties, + typename ValueType, + std::enable_if_t<::Kokkos::is_team_handle::value, int> = 0> +KOKKOS_FUNCTION ValueType +reduce(const TeamHandleType& teamHandle, + const ::Kokkos::View& view, + ValueType init_reduction_value) { + static_assert(std::is_move_constructible::value, + "ValueType must be move constructible."); + + namespace KE = ::Kokkos::Experimental; + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + + return Impl::reduce_default_functors_team_impl( + teamHandle, KE::cbegin(view), KE::cend(view), init_reduction_value); +} + +// +// overload set 3 +// +template < + typename TeamHandleType, typename IteratorType, typename ValueType, + typename BinaryOp, + std::enable_if_t<::Kokkos::is_team_handle::value, int> = 0> +KOKKOS_FUNCTION ValueType reduce(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last, + ValueType init_reduction_value, + BinaryOp joiner) { + static_assert(std::is_move_constructible::value, + "ValueType must be move constructible."); + + return Impl::reduce_custom_functors_team_impl(teamHandle, first, last, + init_reduction_value, joiner); +} + +template < + typename TeamHandleType, typename DataType, typename... Properties, + typename ValueType, typename BinaryOp, + std::enable_if_t<::Kokkos::is_team_handle::value, int> = 0> +KOKKOS_FUNCTION ValueType +reduce(const TeamHandleType& teamHandle, + const ::Kokkos::View& view, + ValueType init_reduction_value, BinaryOp joiner) { + static_assert(std::is_move_constructible::value, + "ValueType must be move constructible."); + + namespace KE = ::Kokkos::Experimental; + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + + return Impl::reduce_custom_functors_team_impl(teamHandle, KE::cbegin(view), + KE::cend(view), + init_reduction_value, joiner); } } // namespace Experimental diff --git a/algorithms/src/std_algorithms/Kokkos_Remove.hpp b/algorithms/src/std_algorithms/Kokkos_Remove.hpp index c8602d2f53..8a429d8d51 100644 --- a/algorithms/src/std_algorithms/Kokkos_Remove.hpp +++ b/algorithms/src/std_algorithms/Kokkos_Remove.hpp @@ -23,38 +23,74 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename Iterator, typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> Iterator remove(const ExecutionSpace& ex, Iterator first, Iterator last, const ValueType& value) { - return Impl::remove_impl("Kokkos::remove_iterator_api_default", ex, first, - last, value); + return Impl::remove_exespace_impl("Kokkos::remove_iterator_api_default", ex, + first, last, value); } -template +template < + typename ExecutionSpace, typename Iterator, typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> Iterator remove(const std::string& label, const ExecutionSpace& ex, Iterator first, Iterator last, const ValueType& value) { - return Impl::remove_impl(label, ex, first, last, value); + return Impl::remove_exespace_impl(label, ex, first, last, value); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto remove(const ExecutionSpace& ex, const ::Kokkos::View& view, const ValueType& value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::remove_impl("Kokkos::remove_iterator_api_default", ex, - ::Kokkos::Experimental::begin(view), - ::Kokkos::Experimental::end(view), value); + return Impl::remove_exespace_impl("Kokkos::remove_iterator_api_default", ex, + ::Kokkos::Experimental::begin(view), + ::Kokkos::Experimental::end(view), value); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto remove(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, const ValueType& value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::remove_impl(label, ex, ::Kokkos::Experimental::begin(view), - ::Kokkos::Experimental::end(view), value); + return Impl::remove_exespace_impl(label, ex, + ::Kokkos::Experimental::begin(view), + ::Kokkos::Experimental::end(view), value); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION Iterator remove(const TeamHandleType& teamHandle, + Iterator first, Iterator last, + const ValueType& value) { + return Impl::remove_team_impl(teamHandle, first, last, value); +} + +template , int> = 0> +KOKKOS_FUNCTION auto remove(const TeamHandleType& teamHandle, + const ::Kokkos::View& view, + const ValueType& value) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + return Impl::remove_team_impl(teamHandle, ::Kokkos::Experimental::begin(view), + ::Kokkos::Experimental::end(view), value); } } // namespace Experimental diff --git a/algorithms/src/std_algorithms/Kokkos_RemoveCopy.hpp b/algorithms/src/std_algorithms/Kokkos_RemoveCopy.hpp index c2c06f6202..4b8fa9fe07 100644 --- a/algorithms/src/std_algorithms/Kokkos_RemoveCopy.hpp +++ b/algorithms/src/std_algorithms/Kokkos_RemoveCopy.hpp @@ -23,26 +23,36 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator remove_copy(const ExecutionSpace& ex, InputIterator first_from, InputIterator last_from, OutputIterator first_dest, const ValueType& value) { - return Impl::remove_copy_impl("Kokkos::remove_copy_iterator_api_default", ex, - first_from, last_from, first_dest, value); + return Impl::remove_copy_exespace_impl( + "Kokkos::remove_copy_iterator_api_default", ex, first_from, last_from, + first_dest, value); } -template +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator remove_copy(const std::string& label, const ExecutionSpace& ex, InputIterator first_from, InputIterator last_from, OutputIterator first_dest, const ValueType& value) { - return Impl::remove_copy_impl(label, ex, first_from, last_from, first_dest, - value); + return Impl::remove_copy_exespace_impl(label, ex, first_from, last_from, + first_dest, value); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto remove_copy(const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, @@ -50,15 +60,17 @@ auto remove_copy(const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); - return Impl::remove_copy_impl("Kokkos::remove_copy_iterator_api_default", ex, - ::Kokkos::Experimental::cbegin(view_from), - ::Kokkos::Experimental::cend(view_from), - ::Kokkos::Experimental::begin(view_dest), - value); + return Impl::remove_copy_exespace_impl( + "Kokkos::remove_copy_iterator_api_default", ex, + ::Kokkos::Experimental::cbegin(view_from), + ::Kokkos::Experimental::cend(view_from), + ::Kokkos::Experimental::begin(view_dest), value); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto remove_copy(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, @@ -66,12 +78,46 @@ auto remove_copy(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); - return Impl::remove_copy_impl( + return Impl::remove_copy_exespace_impl( label, ex, ::Kokkos::Experimental::cbegin(view_from), ::Kokkos::Experimental::cend(view_from), ::Kokkos::Experimental::begin(view_dest), value); } +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION OutputIterator remove_copy(const TeamHandleType& teamHandle, + InputIterator first_from, + InputIterator last_from, + OutputIterator first_dest, + const ValueType& value) { + return Impl::remove_copy_team_impl(teamHandle, first_from, last_from, + first_dest, value); +} + +template , int> = 0> +KOKKOS_FUNCTION auto remove_copy( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view_from, + const ::Kokkos::View& view_dest, + const ValueType& value) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + + return Impl::remove_copy_team_impl( + teamHandle, ::Kokkos::Experimental::cbegin(view_from), + ::Kokkos::Experimental::cend(view_from), + ::Kokkos::Experimental::begin(view_dest), value); +} + } // namespace Experimental } // namespace Kokkos diff --git a/algorithms/src/std_algorithms/Kokkos_RemoveCopyIf.hpp b/algorithms/src/std_algorithms/Kokkos_RemoveCopyIf.hpp index 6d642ed6f0..45e2b54bb6 100644 --- a/algorithms/src/std_algorithms/Kokkos_RemoveCopyIf.hpp +++ b/algorithms/src/std_algorithms/Kokkos_RemoveCopyIf.hpp @@ -23,30 +23,39 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + typename UnaryPredicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator remove_copy_if(const ExecutionSpace& ex, InputIterator first_from, InputIterator last_from, OutputIterator first_dest, const UnaryPredicate& pred) { - return Impl::remove_copy_if_impl( + return Impl::remove_copy_if_exespace_impl( "Kokkos::remove_copy_if_iterator_api_default", ex, first_from, last_from, first_dest, pred); } -template +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + typename UnaryPredicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator remove_copy_if(const std::string& label, const ExecutionSpace& ex, InputIterator first_from, InputIterator last_from, OutputIterator first_dest, const UnaryPredicate& pred) { - return Impl::remove_copy_if_impl(label, ex, first_from, last_from, first_dest, - pred); + return Impl::remove_copy_if_exespace_impl(label, ex, first_from, last_from, + first_dest, pred); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename UnaryPredicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto remove_copy_if(const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, @@ -54,15 +63,17 @@ auto remove_copy_if(const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); - return Impl::remove_copy_if_impl( + return Impl::remove_copy_if_exespace_impl( "Kokkos::remove_copy_if_iterator_api_default", ex, ::Kokkos::Experimental::cbegin(view_from), ::Kokkos::Experimental::cend(view_from), ::Kokkos::Experimental::begin(view_dest), pred); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename UnaryPredicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto remove_copy_if(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, @@ -70,12 +81,46 @@ auto remove_copy_if(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); - return Impl::remove_copy_if_impl( + return Impl::remove_copy_if_exespace_impl( label, ex, ::Kokkos::Experimental::cbegin(view_from), ::Kokkos::Experimental::cend(view_from), ::Kokkos::Experimental::begin(view_dest), pred); } +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION OutputIterator remove_copy_if(const TeamHandleType& teamHandle, + InputIterator first_from, + InputIterator last_from, + OutputIterator first_dest, + const UnaryPredicate& pred) { + return Impl::remove_copy_if_team_impl(teamHandle, first_from, last_from, + first_dest, pred); +} + +template , int> = 0> +KOKKOS_FUNCTION auto remove_copy_if( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view_from, + const ::Kokkos::View& view_dest, + const UnaryPredicate& pred) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + + return Impl::remove_copy_if_team_impl( + teamHandle, ::Kokkos::Experimental::cbegin(view_from), + ::Kokkos::Experimental::cend(view_from), + ::Kokkos::Experimental::begin(view_dest), pred); +} + } // namespace Experimental } // namespace Kokkos diff --git a/algorithms/src/std_algorithms/Kokkos_RemoveIf.hpp b/algorithms/src/std_algorithms/Kokkos_RemoveIf.hpp index 4062e8d373..38461a37f2 100644 --- a/algorithms/src/std_algorithms/Kokkos_RemoveIf.hpp +++ b/algorithms/src/std_algorithms/Kokkos_RemoveIf.hpp @@ -23,39 +23,77 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename Iterator, typename UnaryPredicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> Iterator remove_if(const ExecutionSpace& ex, Iterator first, Iterator last, UnaryPredicate pred) { - return Impl::remove_if_impl("Kokkos::remove_if_iterator_api_default", ex, - first, last, pred); + return Impl::remove_if_exespace_impl("Kokkos::remove_if_iterator_api_default", + ex, first, last, pred); } -template +template < + typename ExecutionSpace, typename Iterator, typename UnaryPredicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> Iterator remove_if(const std::string& label, const ExecutionSpace& ex, Iterator first, Iterator last, UnaryPredicate pred) { - return Impl::remove_if_impl(label, ex, first, last, pred); + return Impl::remove_if_exespace_impl(label, ex, first, last, pred); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename UnaryPredicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto remove_if(const ExecutionSpace& ex, const ::Kokkos::View& view, UnaryPredicate pred) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::remove_if_impl("Kokkos::remove_if_iterator_api_default", ex, - ::Kokkos::Experimental::begin(view), - ::Kokkos::Experimental::end(view), pred); + return Impl::remove_if_exespace_impl("Kokkos::remove_if_iterator_api_default", + ex, ::Kokkos::Experimental::begin(view), + ::Kokkos::Experimental::end(view), pred); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename UnaryPredicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto remove_if(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, UnaryPredicate pred) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::remove_if_impl(label, ex, ::Kokkos::Experimental::begin(view), - ::Kokkos::Experimental::end(view), pred); + return Impl::remove_if_exespace_impl(label, ex, + ::Kokkos::Experimental::begin(view), + ::Kokkos::Experimental::end(view), pred); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION Iterator remove_if(const TeamHandleType& teamHandle, + Iterator first, Iterator last, + UnaryPredicate pred) { + return Impl::remove_if_team_impl(teamHandle, first, last, pred); +} + +template , int> = 0> +KOKKOS_FUNCTION auto remove_if( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view, UnaryPredicate pred) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + + return Impl::remove_if_team_impl(teamHandle, + ::Kokkos::Experimental::begin(view), + ::Kokkos::Experimental::end(view), pred); } } // namespace Experimental diff --git a/algorithms/src/std_algorithms/Kokkos_Replace.hpp b/algorithms/src/std_algorithms/Kokkos_Replace.hpp index 4d1490ded0..29afc4f0c2 100644 --- a/algorithms/src/std_algorithms/Kokkos_Replace.hpp +++ b/algorithms/src/std_algorithms/Kokkos_Replace.hpp @@ -23,40 +23,77 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename Iterator, typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> void replace(const ExecutionSpace& ex, Iterator first, Iterator last, const ValueType& old_value, const ValueType& new_value) { - return Impl::replace_impl("Kokkos::replace_iterator_api", ex, first, last, - old_value, new_value); + Impl::replace_exespace_impl("Kokkos::replace_iterator_api", ex, first, last, + old_value, new_value); } -template +template < + typename ExecutionSpace, typename Iterator, typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> void replace(const std::string& label, const ExecutionSpace& ex, Iterator first, Iterator last, const ValueType& old_value, const ValueType& new_value) { - return Impl::replace_impl(label, ex, first, last, old_value, new_value); + Impl::replace_exespace_impl(label, ex, first, last, old_value, new_value); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> void replace(const ExecutionSpace& ex, const ::Kokkos::View& view, const ValueType& old_value, const ValueType& new_value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); namespace KE = ::Kokkos::Experimental; - return Impl::replace_impl("Kokkos::replace_view_api", ex, KE::begin(view), - KE::end(view), old_value, new_value); + Impl::replace_exespace_impl("Kokkos::replace_view_api", ex, KE::begin(view), + KE::end(view), old_value, new_value); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> void replace(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, const ValueType& old_value, const ValueType& new_value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); namespace KE = ::Kokkos::Experimental; - return Impl::replace_impl(label, ex, KE::begin(view), KE::end(view), - old_value, new_value); + Impl::replace_exespace_impl(label, ex, KE::begin(view), KE::end(view), + old_value, new_value); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION void replace(const TeamHandleType& teamHandle, Iterator first, + Iterator last, const ValueType& old_value, + const ValueType& new_value) { + Impl::replace_team_impl(teamHandle, first, last, old_value, new_value); +} + +template , int> = 0> +KOKKOS_FUNCTION void replace( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view, + const ValueType& old_value, const ValueType& new_value) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + namespace KE = ::Kokkos::Experimental; + Impl::replace_team_impl(teamHandle, KE::begin(view), KE::end(view), old_value, + new_value); } } // namespace Experimental diff --git a/algorithms/src/std_algorithms/Kokkos_ReplaceCopy.hpp b/algorithms/src/std_algorithms/Kokkos_ReplaceCopy.hpp index e7f464e4bd..04d5767e89 100644 --- a/algorithms/src/std_algorithms/Kokkos_ReplaceCopy.hpp +++ b/algorithms/src/std_algorithms/Kokkos_ReplaceCopy.hpp @@ -23,30 +23,39 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator replace_copy(const ExecutionSpace& ex, InputIterator first_from, InputIterator last_from, OutputIterator first_dest, const ValueType& old_value, const ValueType& new_value) { - return Impl::replace_copy_impl("Kokkos::replace_copy_iterator_api", ex, - first_from, last_from, first_dest, old_value, - new_value); + return Impl::replace_copy_exespace_impl("Kokkos::replace_copy_iterator_api", + ex, first_from, last_from, first_dest, + old_value, new_value); } -template +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator replace_copy(const std::string& label, const ExecutionSpace& ex, InputIterator first_from, InputIterator last_from, OutputIterator first_dest, const ValueType& old_value, const ValueType& new_value) { - return Impl::replace_copy_impl(label, ex, first_from, last_from, first_dest, - old_value, new_value); + return Impl::replace_copy_exespace_impl(label, ex, first_from, last_from, + first_dest, old_value, new_value); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto replace_copy(const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, @@ -54,13 +63,15 @@ auto replace_copy(const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); namespace KE = ::Kokkos::Experimental; - return Impl::replace_copy_impl("Kokkos::replace_copy_view_api", ex, - KE::cbegin(view_from), KE::cend(view_from), - KE::begin(view_dest), old_value, new_value); + return Impl::replace_copy_exespace_impl( + "Kokkos::replace_copy_view_api", ex, KE::cbegin(view_from), + KE::cend(view_from), KE::begin(view_dest), old_value, new_value); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto replace_copy(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, @@ -68,9 +79,43 @@ auto replace_copy(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); namespace KE = ::Kokkos::Experimental; - return Impl::replace_copy_impl(label, ex, KE::cbegin(view_from), - KE::cend(view_from), KE::begin(view_dest), - old_value, new_value); + return Impl::replace_copy_exespace_impl( + label, ex, KE::cbegin(view_from), KE::cend(view_from), + KE::begin(view_dest), old_value, new_value); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION OutputIterator replace_copy(const TeamHandleType& teamHandle, + InputIterator first_from, + InputIterator last_from, + OutputIterator first_dest, + const ValueType& old_value, + const ValueType& new_value) { + return Impl::replace_copy_team_impl(teamHandle, first_from, last_from, + first_dest, old_value, new_value); +} + +template , int> = 0> +KOKKOS_FUNCTION auto replace_copy( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view_from, + const ::Kokkos::View& view_dest, + const ValueType& old_value, const ValueType& new_value) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + namespace KE = ::Kokkos::Experimental; + return Impl::replace_copy_team_impl(teamHandle, KE::cbegin(view_from), + KE::cend(view_from), KE::begin(view_dest), + old_value, new_value); } } // namespace Experimental diff --git a/algorithms/src/std_algorithms/Kokkos_ReplaceCopyIf.hpp b/algorithms/src/std_algorithms/Kokkos_ReplaceCopyIf.hpp index 71ae8f8452..b87163f194 100644 --- a/algorithms/src/std_algorithms/Kokkos_ReplaceCopyIf.hpp +++ b/algorithms/src/std_algorithms/Kokkos_ReplaceCopyIf.hpp @@ -23,33 +23,42 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + typename PredicateType, typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator replace_copy_if(const ExecutionSpace& ex, InputIterator first_from, InputIterator last_from, OutputIterator first_dest, PredicateType pred, const ValueType& new_value) { - return Impl::replace_copy_if_impl("Kokkos::replace_copy_if_iterator_api", ex, - first_from, last_from, first_dest, pred, - new_value); + return Impl::replace_copy_if_exespace_impl( + "Kokkos::replace_copy_if_iterator_api", ex, first_from, last_from, + first_dest, pred, new_value); } -template +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + typename PredicateType, typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator replace_copy_if(const std::string& label, const ExecutionSpace& ex, InputIterator first_from, InputIterator last_from, OutputIterator first_dest, PredicateType pred, const ValueType& new_value) { - return Impl::replace_copy_if_impl(label, ex, first_from, last_from, - first_dest, pred, new_value); + return Impl::replace_copy_if_exespace_impl(label, ex, first_from, last_from, + first_dest, pred, new_value); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename PredicateType, + typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto replace_copy_if(const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, @@ -57,14 +66,16 @@ auto replace_copy_if(const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); namespace KE = ::Kokkos::Experimental; - return Impl::replace_copy_if_impl("Kokkos::replace_copy_if_view_api", ex, - KE::cbegin(view_from), KE::cend(view_from), - KE::begin(view_dest), pred, new_value); + return Impl::replace_copy_if_exespace_impl( + "Kokkos::replace_copy_if_view_api", ex, KE::cbegin(view_from), + KE::cend(view_from), KE::begin(view_dest), pred, new_value); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename PredicateType, + typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto replace_copy_if(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, @@ -72,9 +83,44 @@ auto replace_copy_if(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); namespace KE = ::Kokkos::Experimental; - return Impl::replace_copy_if_impl(label, ex, KE::cbegin(view_from), - KE::cend(view_from), KE::begin(view_dest), - pred, new_value); + return Impl::replace_copy_if_exespace_impl( + label, ex, KE::cbegin(view_from), KE::cend(view_from), + KE::begin(view_dest), pred, new_value); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION OutputIterator replace_copy_if(const TeamHandleType& teamHandle, + InputIterator first_from, + InputIterator last_from, + OutputIterator first_dest, + PredicateType pred, + const ValueType& new_value) { + return Impl::replace_copy_if_team_impl(teamHandle, first_from, last_from, + first_dest, pred, new_value); +} + +template , int> = 0> +KOKKOS_FUNCTION auto replace_copy_if( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view_from, + const ::Kokkos::View& view_dest, + PredicateType pred, const ValueType& new_value) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + namespace KE = ::Kokkos::Experimental; + return Impl::replace_copy_if_team_impl(teamHandle, KE::cbegin(view_from), + KE::cend(view_from), + KE::begin(view_dest), pred, new_value); } } // namespace Experimental diff --git a/algorithms/src/std_algorithms/Kokkos_ReplaceIf.hpp b/algorithms/src/std_algorithms/Kokkos_ReplaceIf.hpp index 7f06540e06..73af1f16f0 100644 --- a/algorithms/src/std_algorithms/Kokkos_ReplaceIf.hpp +++ b/algorithms/src/std_algorithms/Kokkos_ReplaceIf.hpp @@ -23,43 +23,82 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename InputIterator, typename Predicate, + typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> void replace_if(const ExecutionSpace& ex, InputIterator first, InputIterator last, Predicate pred, const ValueType& new_value) { - return Impl::replace_if_impl("Kokkos::replace_if_iterator_api", ex, first, - last, pred, new_value); + Impl::replace_if_exespace_impl("Kokkos::replace_if_iterator_api", ex, first, + last, pred, new_value); } -template +template < + typename ExecutionSpace, typename InputIterator, typename Predicate, + typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> void replace_if(const std::string& label, const ExecutionSpace& ex, InputIterator first, InputIterator last, Predicate pred, const ValueType& new_value) { - return Impl::replace_if_impl(label, ex, first, last, pred, new_value); + Impl::replace_if_exespace_impl(label, ex, first, last, pred, new_value); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename Predicate, typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> void replace_if(const ExecutionSpace& ex, const ::Kokkos::View& view, Predicate pred, const ValueType& new_value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); namespace KE = ::Kokkos::Experimental; - return Impl::replace_if_impl("Kokkos::replace_if_view_api", ex, - KE::begin(view), KE::end(view), pred, new_value); + Impl::replace_if_exespace_impl("Kokkos::replace_if_view_api", ex, + KE::begin(view), KE::end(view), pred, + new_value); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename Predicate, typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> void replace_if(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, Predicate pred, const ValueType& new_value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); namespace KE = ::Kokkos::Experimental; - return Impl::replace_if_impl(label, ex, KE::begin(view), KE::end(view), pred, - new_value); + Impl::replace_if_exespace_impl(label, ex, KE::begin(view), KE::end(view), + pred, new_value); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION void replace_if(const TeamHandleType& teamHandle, + InputIterator first, InputIterator last, + Predicate pred, const ValueType& new_value) { + Impl::replace_if_team_impl(teamHandle, first, last, pred, new_value); +} + +template , int> = 0> +KOKKOS_FUNCTION void replace_if( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view, Predicate pred, + const ValueType& new_value) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + namespace KE = ::Kokkos::Experimental; + Impl::replace_if_team_impl(teamHandle, KE::begin(view), KE::end(view), pred, + new_value); } } // namespace Experimental diff --git a/algorithms/src/std_algorithms/Kokkos_Reverse.hpp b/algorithms/src/std_algorithms/Kokkos_Reverse.hpp index 9f2fc5f3cc..a0786d3a2e 100644 --- a/algorithms/src/std_algorithms/Kokkos_Reverse.hpp +++ b/algorithms/src/std_algorithms/Kokkos_Reverse.hpp @@ -23,34 +23,67 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename InputIterator, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> void reverse(const ExecutionSpace& ex, InputIterator first, InputIterator last) { - return Impl::reverse_impl("Kokkos::reverse_iterator_api_default", ex, first, - last); + return Impl::reverse_exespace_impl("Kokkos::reverse_iterator_api_default", ex, + first, last); } -template +template < + typename ExecutionSpace, typename InputIterator, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> void reverse(const std::string& label, const ExecutionSpace& ex, InputIterator first, InputIterator last) { - return Impl::reverse_impl(label, ex, first, last); + return Impl::reverse_exespace_impl(label, ex, first, last); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> void reverse(const ExecutionSpace& ex, const ::Kokkos::View& view) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); namespace KE = ::Kokkos::Experimental; - return Impl::reverse_impl("Kokkos::reverse_view_api_default", ex, - KE::begin(view), KE::end(view)); + return Impl::reverse_exespace_impl("Kokkos::reverse_view_api_default", ex, + KE::begin(view), KE::end(view)); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> void reverse(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); namespace KE = ::Kokkos::Experimental; - return Impl::reverse_impl(label, ex, KE::begin(view), KE::end(view)); + return Impl::reverse_exespace_impl(label, ex, KE::begin(view), KE::end(view)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION void reverse(const TeamHandleType& teamHandle, + InputIterator first, InputIterator last) { + return Impl::reverse_team_impl(teamHandle, first, last); +} + +template , int> = 0> +KOKKOS_FUNCTION void reverse( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + namespace KE = ::Kokkos::Experimental; + return Impl::reverse_team_impl(teamHandle, KE::begin(view), KE::end(view)); } } // namespace Experimental diff --git a/algorithms/src/std_algorithms/Kokkos_ReverseCopy.hpp b/algorithms/src/std_algorithms/Kokkos_ReverseCopy.hpp index 279bb22086..66f39c4eaa 100644 --- a/algorithms/src/std_algorithms/Kokkos_ReverseCopy.hpp +++ b/algorithms/src/std_algorithms/Kokkos_ReverseCopy.hpp @@ -23,42 +23,83 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator reverse_copy(const ExecutionSpace& ex, InputIterator first, InputIterator last, OutputIterator d_first) { - return Impl::reverse_copy_impl("Kokkos::reverse_copy_iterator_api_default", - ex, first, last, d_first); + return Impl::reverse_copy_exespace_impl( + "Kokkos::reverse_copy_iterator_api_default", ex, first, last, d_first); } -template +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator reverse_copy(const std::string& label, const ExecutionSpace& ex, InputIterator first, InputIterator last, OutputIterator d_first) { - return Impl::reverse_copy_impl(label, ex, first, last, d_first); + return Impl::reverse_copy_exespace_impl(label, ex, first, last, d_first); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto reverse_copy(const ExecutionSpace& ex, const ::Kokkos::View& source, - ::Kokkos::View& dest) { + const ::Kokkos::View& dest) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - return Impl::reverse_copy_impl("Kokkos::reverse_copy_view_api_default", ex, - cbegin(source), cend(source), begin(dest)); + return Impl::reverse_copy_exespace_impl( + "Kokkos::reverse_copy_view_api_default", ex, cbegin(source), cend(source), + begin(dest)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto reverse_copy(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& source, - ::Kokkos::View& dest) { + const ::Kokkos::View& dest) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); + + return Impl::reverse_copy_exespace_impl(label, ex, cbegin(source), + cend(source), begin(dest)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION OutputIterator reverse_copy(const TeamHandleType& teamHandle, + InputIterator first, + InputIterator last, + OutputIterator d_first) { + return Impl::reverse_copy_team_impl(teamHandle, first, last, d_first); +} + +template , int> = 0> +KOKKOS_FUNCTION auto reverse_copy( + const TeamHandleType& teamHandle, + const ::Kokkos::View& source, + const ::Kokkos::View& dest) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - return Impl::reverse_copy_impl(label, ex, cbegin(source), cend(source), - begin(dest)); + return Impl::reverse_copy_team_impl(teamHandle, cbegin(source), cend(source), + begin(dest)); } } // namespace Experimental diff --git a/algorithms/src/std_algorithms/Kokkos_Rotate.hpp b/algorithms/src/std_algorithms/Kokkos_Rotate.hpp index 738e9bf137..aff04b47d6 100644 --- a/algorithms/src/std_algorithms/Kokkos_Rotate.hpp +++ b/algorithms/src/std_algorithms/Kokkos_Rotate.hpp @@ -23,36 +23,71 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename IteratorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType rotate(const ExecutionSpace& ex, IteratorType first, IteratorType n_first, IteratorType last) { - return Impl::rotate_impl("Kokkos::rotate_iterator_api_default", ex, first, - n_first, last); + return Impl::rotate_exespace_impl("Kokkos::rotate_iterator_api_default", ex, + first, n_first, last); } -template +template < + typename ExecutionSpace, typename IteratorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType rotate(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType n_first, IteratorType last) { - return Impl::rotate_impl(label, ex, first, n_first, last); + return Impl::rotate_exespace_impl(label, ex, first, n_first, last); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto rotate(const ExecutionSpace& ex, const ::Kokkos::View& view, std::size_t n_location) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::rotate_impl("Kokkos::rotate_view_api_default", ex, begin(view), - begin(view) + n_location, end(view)); + return Impl::rotate_exespace_impl("Kokkos::rotate_view_api_default", ex, + begin(view), begin(view) + n_location, + end(view)); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto rotate(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, std::size_t n_location) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::rotate_impl(label, ex, begin(view), begin(view) + n_location, - end(view)); + return Impl::rotate_exespace_impl(label, ex, begin(view), + begin(view) + n_location, end(view)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION IteratorType rotate(const TeamHandleType& teamHandle, + IteratorType first, IteratorType n_first, + IteratorType last) { + return Impl::rotate_team_impl(teamHandle, first, n_first, last); +} + +template , int> = 0> +KOKKOS_FUNCTION auto rotate(const TeamHandleType& teamHandle, + const ::Kokkos::View& view, + std::size_t n_location) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + return Impl::rotate_team_impl(teamHandle, begin(view), + begin(view) + n_location, end(view)); } } // namespace Experimental diff --git a/algorithms/src/std_algorithms/Kokkos_RotateCopy.hpp b/algorithms/src/std_algorithms/Kokkos_RotateCopy.hpp index f5d826c4bb..cce37fccfa 100644 --- a/algorithms/src/std_algorithms/Kokkos_RotateCopy.hpp +++ b/algorithms/src/std_algorithms/Kokkos_RotateCopy.hpp @@ -23,23 +23,34 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator rotate_copy(const ExecutionSpace& ex, InputIterator first, InputIterator n_first, InputIterator last, OutputIterator d_first) { - return Impl::rotate_copy_impl("Kokkos::rotate_copy_iterator_api_default", ex, - first, n_first, last, d_first); + return Impl::rotate_copy_exespace_impl( + "Kokkos::rotate_copy_iterator_api_default", ex, first, n_first, last, + d_first); } -template +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator rotate_copy(const std::string& label, const ExecutionSpace& ex, InputIterator first, InputIterator n_first, InputIterator last, OutputIterator d_first) { - return Impl::rotate_copy_impl(label, ex, first, n_first, last, d_first); + return Impl::rotate_copy_exespace_impl(label, ex, first, n_first, last, + d_first); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto rotate_copy(const ExecutionSpace& ex, const ::Kokkos::View& source, std::size_t n_location, @@ -47,13 +58,15 @@ auto rotate_copy(const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - return Impl::rotate_copy_impl("Kokkos::rotate_copy_view_api_default", ex, - cbegin(source), cbegin(source) + n_location, - cend(source), begin(dest)); + return Impl::rotate_copy_exespace_impl( + "Kokkos::rotate_copy_view_api_default", ex, cbegin(source), + cbegin(source) + n_location, cend(source), begin(dest)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto rotate_copy(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& source, std::size_t n_location, @@ -61,9 +74,41 @@ auto rotate_copy(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - return Impl::rotate_copy_impl(label, ex, cbegin(source), - cbegin(source) + n_location, cend(source), - begin(dest)); + return Impl::rotate_copy_exespace_impl(label, ex, cbegin(source), + cbegin(source) + n_location, + cend(source), begin(dest)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION OutputIterator rotate_copy(const TeamHandleType& teamHandle, + InputIterator first, + InputIterator n_first, + InputIterator last, + OutputIterator d_first) { + return Impl::rotate_copy_team_impl(teamHandle, first, n_first, last, d_first); +} + +template , int> = 0> +KOKKOS_FUNCTION auto rotate_copy( + const TeamHandleType& teamHandle, + const ::Kokkos::View& source, + std::size_t n_location, + const ::Kokkos::View& dest) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); + + return Impl::rotate_copy_team_impl(teamHandle, cbegin(source), + cbegin(source) + n_location, cend(source), + begin(dest)); } } // namespace Experimental diff --git a/algorithms/src/std_algorithms/Kokkos_Search.hpp b/algorithms/src/std_algorithms/Kokkos_Search.hpp index b1154b297e..43258a484e 100644 --- a/algorithms/src/std_algorithms/Kokkos_Search.hpp +++ b/algorithms/src/std_algorithms/Kokkos_Search.hpp @@ -23,24 +23,34 @@ namespace Kokkos { namespace Experimental { +// +// overload set accepting execution space +// + // overload set 1: no binary predicate passed -template +template < + typename ExecutionSpace, typename IteratorType1, typename IteratorType2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType1 search(const ExecutionSpace& ex, IteratorType1 first, IteratorType1 last, IteratorType2 s_first, IteratorType2 s_last) { - return Impl::search_impl("Kokkos::search_iterator_api_default", ex, first, - last, s_first, s_last); + return Impl::search_exespace_impl("Kokkos::search_iterator_api_default", ex, + first, last, s_first, s_last); } -template +template < + typename ExecutionSpace, typename IteratorType1, typename IteratorType2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType1 search(const std::string& label, const ExecutionSpace& ex, IteratorType1 first, IteratorType1 last, IteratorType2 s_first, IteratorType2 s_last) { - return Impl::search_impl(label, ex, first, last, s_first, s_last); + return Impl::search_exespace_impl(label, ex, first, last, s_first, s_last); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto search(const ExecutionSpace& ex, const ::Kokkos::View& view, const ::Kokkos::View& s_view) { @@ -48,13 +58,15 @@ auto search(const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); namespace KE = ::Kokkos::Experimental; - return Impl::search_impl("Kokkos::search_view_api_default", ex, - KE::begin(view), KE::end(view), KE::begin(s_view), - KE::end(s_view)); + return Impl::search_exespace_impl("Kokkos::search_view_api_default", ex, + KE::begin(view), KE::end(view), + KE::begin(s_view), KE::end(s_view)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto search(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, const ::Kokkos::View& s_view) { @@ -62,31 +74,38 @@ auto search(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); namespace KE = ::Kokkos::Experimental; - return Impl::search_impl(label, ex, KE::begin(view), KE::end(view), - KE::begin(s_view), KE::end(s_view)); + return Impl::search_exespace_impl(label, ex, KE::begin(view), KE::end(view), + KE::begin(s_view), KE::end(s_view)); } // overload set 2: binary predicate passed -template +template < + typename ExecutionSpace, typename IteratorType1, typename IteratorType2, + typename BinaryPredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType1 search(const ExecutionSpace& ex, IteratorType1 first, IteratorType1 last, IteratorType2 s_first, IteratorType2 s_last, const BinaryPredicateType& pred) { - return Impl::search_impl("Kokkos::search_iterator_api_default", ex, first, - last, s_first, s_last, pred); + return Impl::search_exespace_impl("Kokkos::search_iterator_api_default", ex, + first, last, s_first, s_last, pred); } -template +template < + typename ExecutionSpace, typename IteratorType1, typename IteratorType2, + typename BinaryPredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType1 search(const std::string& label, const ExecutionSpace& ex, IteratorType1 first, IteratorType1 last, IteratorType2 s_first, IteratorType2 s_last, const BinaryPredicateType& pred) { - return Impl::search_impl(label, ex, first, last, s_first, s_last, pred); + return Impl::search_exespace_impl(label, ex, first, last, s_first, s_last, + pred); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename BinaryPredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto search(const ExecutionSpace& ex, const ::Kokkos::View& view, const ::Kokkos::View& s_view, @@ -95,13 +114,15 @@ auto search(const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); namespace KE = ::Kokkos::Experimental; - return Impl::search_impl("Kokkos::search_view_api_default", ex, - KE::begin(view), KE::end(view), KE::begin(s_view), - KE::end(s_view), pred); + return Impl::search_exespace_impl("Kokkos::search_view_api_default", ex, + KE::begin(view), KE::end(view), + KE::begin(s_view), KE::end(s_view), pred); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename BinaryPredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto search(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, const ::Kokkos::View& s_view, @@ -110,8 +131,70 @@ auto search(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); namespace KE = ::Kokkos::Experimental; - return Impl::search_impl(label, ex, KE::begin(view), KE::end(view), - KE::begin(s_view), KE::end(s_view), pred); + return Impl::search_exespace_impl(label, ex, KE::begin(view), KE::end(view), + KE::begin(s_view), KE::end(s_view), pred); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// + +// overload set 1: no binary predicate passed +template , int> = 0> +KOKKOS_FUNCTION IteratorType1 search(const TeamHandleType& teamHandle, + IteratorType1 first, IteratorType1 last, + IteratorType2 s_first, + IteratorType2 s_last) { + return Impl::search_team_impl(teamHandle, first, last, s_first, s_last); +} + +template , int> = 0> +KOKKOS_FUNCTION auto search( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view, + const ::Kokkos::View& s_view) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); + + namespace KE = ::Kokkos::Experimental; + return Impl::search_team_impl(teamHandle, KE::begin(view), KE::end(view), + KE::begin(s_view), KE::end(s_view)); +} + +// overload set 2: binary predicate passed +template , int> = 0> + +KOKKOS_FUNCTION IteratorType1 search(const TeamHandleType& teamHandle, + IteratorType1 first, IteratorType1 last, + IteratorType2 s_first, + IteratorType2 s_last, + const BinaryPredicateType& pred) { + return Impl::search_team_impl(teamHandle, first, last, s_first, s_last, pred); +} + +template , int> = 0> +KOKKOS_FUNCTION auto search( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view, + const ::Kokkos::View& s_view, + const BinaryPredicateType& pred) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); + + namespace KE = ::Kokkos::Experimental; + return Impl::search_team_impl(teamHandle, KE::begin(view), KE::end(view), + KE::begin(s_view), KE::end(s_view), pred); } } // namespace Experimental diff --git a/algorithms/src/std_algorithms/Kokkos_SearchN.hpp b/algorithms/src/std_algorithms/Kokkos_SearchN.hpp index a649c8f205..0f8aa5f1c1 100644 --- a/algorithms/src/std_algorithms/Kokkos_SearchN.hpp +++ b/algorithms/src/std_algorithms/Kokkos_SearchN.hpp @@ -23,68 +23,86 @@ namespace Kokkos { namespace Experimental { +// +// overload set accepting execution space +// + // overload set 1: no binary predicate passed -template +template < + class ExecutionSpace, class IteratorType, class SizeType, class ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType search_n(const ExecutionSpace& ex, IteratorType first, IteratorType last, SizeType count, const ValueType& value) { - return Impl::search_n_impl("Kokkos::search_n_iterator_api_default", ex, first, - last, count, value); + return Impl::search_n_exespace_impl("Kokkos::search_n_iterator_api_default", + ex, first, last, count, value); } -template +template < + class ExecutionSpace, class IteratorType, class SizeType, class ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType search_n(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, SizeType count, const ValueType& value) { - return Impl::search_n_impl(label, ex, first, last, count, value); + return Impl::search_n_exespace_impl(label, ex, first, last, count, value); } template + class SizeType, class ValueType, + std::enable_if_t<::Kokkos::is_execution_space::value, + int> = 0> auto search_n(const ExecutionSpace& ex, const ::Kokkos::View& view, SizeType count, const ValueType& value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); namespace KE = ::Kokkos::Experimental; - return Impl::search_n_impl("Kokkos::search_n_view_api_default", ex, - KE::begin(view), KE::end(view), count, value); + return Impl::search_n_exespace_impl("Kokkos::search_n_view_api_default", ex, + KE::begin(view), KE::end(view), count, + value); } template + class SizeType, class ValueType, + std::enable_if_t<::Kokkos::is_execution_space::value, + int> = 0> auto search_n(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, SizeType count, const ValueType& value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); namespace KE = ::Kokkos::Experimental; - return Impl::search_n_impl(label, ex, KE::begin(view), KE::end(view), count, - value); + return Impl::search_n_exespace_impl(label, ex, KE::begin(view), KE::end(view), + count, value); } // overload set 2: binary predicate passed -template +template < + class ExecutionSpace, class IteratorType, class SizeType, class ValueType, + class BinaryPredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType search_n(const ExecutionSpace& ex, IteratorType first, IteratorType last, SizeType count, const ValueType& value, const BinaryPredicateType& pred) { - return Impl::search_n_impl("Kokkos::search_n_iterator_api_default", ex, first, - last, count, value, pred); + return Impl::search_n_exespace_impl("Kokkos::search_n_iterator_api_default", + ex, first, last, count, value, pred); } -template +template < + class ExecutionSpace, class IteratorType, class SizeType, class ValueType, + class BinaryPredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType search_n(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, SizeType count, const ValueType& value, const BinaryPredicateType& pred) { - return Impl::search_n_impl(label, ex, first, last, count, value, pred); + return Impl::search_n_exespace_impl(label, ex, first, last, count, value, + pred); } template + class SizeType, class ValueType, class BinaryPredicateType, + std::enable_if_t<::Kokkos::is_execution_space::value, + int> = 0> auto search_n(const ExecutionSpace& ex, const ::Kokkos::View& view, SizeType count, const ValueType& value, @@ -92,13 +110,15 @@ auto search_n(const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); namespace KE = ::Kokkos::Experimental; - return Impl::search_n_impl("Kokkos::search_n_view_api_default", ex, - KE::begin(view), KE::end(view), count, value, - pred); + return Impl::search_n_exespace_impl("Kokkos::search_n_view_api_default", ex, + KE::begin(view), KE::end(view), count, + value, pred); } template + class SizeType, class ValueType, class BinaryPredicateType, + std::enable_if_t<::Kokkos::is_execution_space::value, + int> = 0> auto search_n(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, SizeType count, const ValueType& value, @@ -106,8 +126,65 @@ auto search_n(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); namespace KE = ::Kokkos::Experimental; - return Impl::search_n_impl(label, ex, KE::begin(view), KE::end(view), count, - value, pred); + return Impl::search_n_exespace_impl(label, ex, KE::begin(view), KE::end(view), + count, value, pred); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// + +// overload set 1: no binary predicate passed +template , int> = 0> +KOKKOS_FUNCTION IteratorType search_n(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last, + SizeType count, const ValueType& value) { + return Impl::search_n_team_impl(teamHandle, first, last, count, value); +} + +template < + class TeamHandleType, class DataType, class... Properties, class SizeType, + class ValueType, + std::enable_if_t<::Kokkos::is_team_handle::value, int> = 0> +KOKKOS_FUNCTION auto search_n( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view, SizeType count, + const ValueType& value) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + + namespace KE = ::Kokkos::Experimental; + return Impl::search_n_team_impl(teamHandle, KE::begin(view), KE::end(view), + count, value); +} + +// overload set 2: binary predicate passed +template , int> = 0> +KOKKOS_FUNCTION IteratorType search_n(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last, + SizeType count, const ValueType& value, + const BinaryPredicateType& pred) { + return Impl::search_n_team_impl(teamHandle, first, last, count, value, pred); +} + +template < + class TeamHandleType, class DataType, class... Properties, class SizeType, + class ValueType, class BinaryPredicateType, + std::enable_if_t<::Kokkos::is_team_handle::value, int> = 0> +KOKKOS_FUNCTION auto search_n( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view, SizeType count, + const ValueType& value, const BinaryPredicateType& pred) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + + namespace KE = ::Kokkos::Experimental; + return Impl::search_n_team_impl(teamHandle, KE::begin(view), KE::end(view), + count, value, pred); } } // namespace Experimental diff --git a/algorithms/src/std_algorithms/Kokkos_ShiftLeft.hpp b/algorithms/src/std_algorithms/Kokkos_ShiftLeft.hpp index 4b91a17ab8..b3e04a3b97 100644 --- a/algorithms/src/std_algorithms/Kokkos_ShiftLeft.hpp +++ b/algorithms/src/std_algorithms/Kokkos_ShiftLeft.hpp @@ -23,36 +23,70 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename IteratorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType shift_left(const ExecutionSpace& ex, IteratorType first, IteratorType last, typename IteratorType::difference_type n) { - return Impl::shift_left_impl("Kokkos::shift_left_iterator_api_default", ex, - first, last, n); + return Impl::shift_left_exespace_impl( + "Kokkos::shift_left_iterator_api_default", ex, first, last, n); } -template +template < + typename ExecutionSpace, typename IteratorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType shift_left(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, typename IteratorType::difference_type n) { - return Impl::shift_left_impl(label, ex, first, last, n); + return Impl::shift_left_exespace_impl(label, ex, first, last, n); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto shift_left(const ExecutionSpace& ex, const ::Kokkos::View& view, typename decltype(begin(view))::difference_type n) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::shift_left_impl("Kokkos::shift_left_view_api_default", ex, - begin(view), end(view), n); + return Impl::shift_left_exespace_impl("Kokkos::shift_left_view_api_default", + ex, begin(view), end(view), n); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto shift_left(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, typename decltype(begin(view))::difference_type n) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::shift_left_impl(label, ex, begin(view), end(view), n); + return Impl::shift_left_exespace_impl(label, ex, begin(view), end(view), n); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION IteratorType +shift_left(const TeamHandleType& teamHandle, IteratorType first, + IteratorType last, typename IteratorType::difference_type n) { + return Impl::shift_left_team_impl(teamHandle, first, last, n); +} + +template , int> = 0> +KOKKOS_FUNCTION auto shift_left( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view, + typename decltype(begin(view))::difference_type n) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + return Impl::shift_left_team_impl(teamHandle, begin(view), end(view), n); } } // namespace Experimental diff --git a/algorithms/src/std_algorithms/Kokkos_ShiftRight.hpp b/algorithms/src/std_algorithms/Kokkos_ShiftRight.hpp index 2ea50fd74e..0f7ed53948 100644 --- a/algorithms/src/std_algorithms/Kokkos_ShiftRight.hpp +++ b/algorithms/src/std_algorithms/Kokkos_ShiftRight.hpp @@ -23,36 +23,70 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename IteratorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType shift_right(const ExecutionSpace& ex, IteratorType first, IteratorType last, typename IteratorType::difference_type n) { - return Impl::shift_right_impl("Kokkos::shift_right_iterator_api_default", ex, - first, last, n); + return Impl::shift_right_exespace_impl( + "Kokkos::shift_right_iterator_api_default", ex, first, last, n); } -template +template < + typename ExecutionSpace, typename IteratorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType shift_right(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, typename IteratorType::difference_type n) { - return Impl::shift_right_impl(label, ex, first, last, n); + return Impl::shift_right_exespace_impl(label, ex, first, last, n); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto shift_right(const ExecutionSpace& ex, const ::Kokkos::View& view, typename decltype(begin(view))::difference_type n) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::shift_right_impl("Kokkos::shift_right_view_api_default", ex, - begin(view), end(view), n); + return Impl::shift_right_exespace_impl("Kokkos::shift_right_view_api_default", + ex, begin(view), end(view), n); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto shift_right(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, typename decltype(begin(view))::difference_type n) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::shift_right_impl(label, ex, begin(view), end(view), n); + return Impl::shift_right_exespace_impl(label, ex, begin(view), end(view), n); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION IteratorType +shift_right(const TeamHandleType& teamHandle, IteratorType first, + IteratorType last, typename IteratorType::difference_type n) { + return Impl::shift_right_team_impl(teamHandle, first, last, n); +} + +template , int> = 0> +KOKKOS_FUNCTION auto shift_right( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view, + typename decltype(begin(view))::difference_type n) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + return Impl::shift_right_team_impl(teamHandle, begin(view), end(view), n); } } // namespace Experimental diff --git a/algorithms/src/std_algorithms/Kokkos_SwapRanges.hpp b/algorithms/src/std_algorithms/Kokkos_SwapRanges.hpp index 5fbf045318..d66763d304 100644 --- a/algorithms/src/std_algorithms/Kokkos_SwapRanges.hpp +++ b/algorithms/src/std_algorithms/Kokkos_SwapRanges.hpp @@ -23,44 +23,84 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template , int> = 0> IteratorType2 swap_ranges(const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2) { - return Impl::swap_ranges_impl("Kokkos::swap_ranges_iterator_api_default", ex, - first1, last1, first2); + return Impl::swap_ranges_exespace_impl( + "Kokkos::swap_ranges_iterator_api_default", ex, first1, last1, first2); } -template +template , int> = 0> auto swap_ranges(const ExecutionSpace& ex, const ::Kokkos::View& source, - ::Kokkos::View& dest) { + const ::Kokkos::View& dest) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); assert(source.extent(0) == dest.extent(0)); - return Impl::swap_ranges_impl("Kokkos::swap_ranges_view_api_default", ex, - begin(source), end(source), begin(dest)); + return Impl::swap_ranges_exespace_impl("Kokkos::swap_ranges_view_api_default", + ex, begin(source), end(source), + begin(dest)); } -template +template , int> = 0> IteratorType2 swap_ranges(const std::string& label, const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2) { - return Impl::swap_ranges_impl(label, ex, first1, last1, first2); + return Impl::swap_ranges_exespace_impl(label, ex, first1, last1, first2); } -template +template , int> = 0> auto swap_ranges(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& source, - ::Kokkos::View& dest) { + const ::Kokkos::View& dest) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); + + assert(source.extent(0) == dest.extent(0)); + return Impl::swap_ranges_exespace_impl(label, ex, begin(source), end(source), + begin(dest)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION IteratorType2 swap_ranges(const TeamHandleType& teamHandle, + IteratorType1 first1, + IteratorType1 last1, + IteratorType2 first2) { + return Impl::swap_ranges_team_impl(teamHandle, first1, last1, first2); +} + +template , int> = 0> +KOKKOS_FUNCTION auto swap_ranges( + const TeamHandleType& teamHandle, + const ::Kokkos::View& source, + const ::Kokkos::View& dest) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); assert(source.extent(0) == dest.extent(0)); - return Impl::swap_ranges_impl(label, ex, begin(source), end(source), - begin(dest)); + return Impl::swap_ranges_team_impl(teamHandle, begin(source), end(source), + begin(dest)); } } // namespace Experimental diff --git a/algorithms/src/std_algorithms/Kokkos_Transform.hpp b/algorithms/src/std_algorithms/Kokkos_Transform.hpp index 27dee30426..84cbed524d 100644 --- a/algorithms/src/std_algorithms/Kokkos_Transform.hpp +++ b/algorithms/src/std_algorithms/Kokkos_Transform.hpp @@ -23,113 +23,200 @@ namespace Kokkos { namespace Experimental { -template -std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< - InputIterator, OutputIterator>::value, - OutputIterator> -transform(const ExecutionSpace& ex, InputIterator first1, InputIterator last1, - OutputIterator d_first, UnaryOperation unary_op) { - return Impl::transform_impl("Kokkos::transform_iterator_api_default", ex, - first1, last1, d_first, std::move(unary_op)); +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + typename UnaryOperation, + std::enable_if_t && + is_execution_space_v, + int> = 0> +OutputIterator transform(const ExecutionSpace& ex, InputIterator first1, + InputIterator last1, OutputIterator d_first, + UnaryOperation unary_op) { + return Impl::transform_exespace_impl("Kokkos::transform_iterator_api_default", + ex, first1, last1, d_first, + std::move(unary_op)); } -template -std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< - InputIterator, OutputIterator>::value, - OutputIterator> -transform(const std::string& label, const ExecutionSpace& ex, - InputIterator first1, InputIterator last1, OutputIterator d_first, - UnaryOperation unary_op) { - return Impl::transform_impl(label, ex, first1, last1, d_first, - std::move(unary_op)); +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + typename UnaryOperation, + std::enable_if_t && + is_execution_space_v, + int> = 0> +OutputIterator transform(const std::string& label, const ExecutionSpace& ex, + InputIterator first1, InputIterator last1, + OutputIterator d_first, UnaryOperation unary_op) { + return Impl::transform_exespace_impl(label, ex, first1, last1, d_first, + std::move(unary_op)); } -template +template , int> = 0> auto transform(const ExecutionSpace& ex, const ::Kokkos::View& source, - ::Kokkos::View& dest, + const ::Kokkos::View& dest, UnaryOperation unary_op) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - return Impl::transform_impl("Kokkos::transform_view_api_default", ex, - begin(source), end(source), begin(dest), - std::move(unary_op)); + return Impl::transform_exespace_impl("Kokkos::transform_view_api_default", ex, + begin(source), end(source), begin(dest), + std::move(unary_op)); } -template +template , int> = 0> auto transform(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& source, - ::Kokkos::View& dest, + const ::Kokkos::View& dest, UnaryOperation unary_op) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - return Impl::transform_impl(label, ex, begin(source), end(source), - begin(dest), std::move(unary_op)); + return Impl::transform_exespace_impl(label, ex, begin(source), end(source), + begin(dest), std::move(unary_op)); } -template -std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< - InputIterator1, InputIterator2, OutputIterator>::value, - OutputIterator> -transform(const ExecutionSpace& ex, InputIterator1 first1, InputIterator1 last1, - InputIterator2 first2, OutputIterator d_first, - BinaryOperation binary_op) { - return Impl::transform_impl("Kokkos::transform_iterator_api_default", ex, - first1, last1, first2, d_first, - std::move(binary_op)); +template < + typename ExecutionSpace, typename InputIterator1, typename InputIterator2, + typename OutputIterator, typename BinaryOperation, + std::enable_if_t< + Impl::are_iterators_v && + is_execution_space_v, + int> = 0> +OutputIterator transform(const ExecutionSpace& ex, InputIterator1 first1, + InputIterator1 last1, InputIterator2 first2, + OutputIterator d_first, BinaryOperation binary_op) { + return Impl::transform_exespace_impl("Kokkos::transform_iterator_api_default", + ex, first1, last1, first2, d_first, + std::move(binary_op)); } -template -std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< - InputIterator1, InputIterator2, OutputIterator>::value, - OutputIterator> -transform(const std::string& label, const ExecutionSpace& ex, - InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, - OutputIterator d_first, BinaryOperation binary_op) { - return Impl::transform_impl(label, ex, first1, last1, first2, d_first, - std::move(binary_op)); +template < + typename ExecutionSpace, typename InputIterator1, typename InputIterator2, + typename OutputIterator, typename BinaryOperation, + std::enable_if_t< + Impl::are_iterators_v && + is_execution_space_v, + int> = 0> +OutputIterator transform(const std::string& label, const ExecutionSpace& ex, + InputIterator1 first1, InputIterator1 last1, + InputIterator2 first2, OutputIterator d_first, + BinaryOperation binary_op) { + return Impl::transform_exespace_impl(label, ex, first1, last1, first2, + d_first, std::move(binary_op)); } -template +template , int> = 0> auto transform(const ExecutionSpace& ex, const ::Kokkos::View& source1, const ::Kokkos::View& source2, - ::Kokkos::View& dest, + const ::Kokkos::View& dest, BinaryOperation binary_op) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source1); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source2); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - return Impl::transform_impl("Kokkos::transform_view_api_default", ex, - begin(source1), end(source1), begin(source2), - begin(dest), std::move(binary_op)); + return Impl::transform_exespace_impl( + "Kokkos::transform_view_api_default", ex, begin(source1), end(source1), + begin(source2), begin(dest), std::move(binary_op)); } -template +template , int> = 0> auto transform(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& source1, const ::Kokkos::View& source2, - ::Kokkos::View& dest, + const ::Kokkos::View& dest, BinaryOperation binary_op) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source1); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source2); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - return Impl::transform_impl(label, ex, begin(source1), end(source1), - begin(source2), begin(dest), - std::move(binary_op)); + return Impl::transform_exespace_impl(label, ex, begin(source1), end(source1), + begin(source2), begin(dest), + std::move(binary_op)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template < + typename TeamHandleType, typename InputIterator, typename OutputIterator, + typename UnaryOperation, + std::enable_if_t && + is_team_handle_v, + int> = 0> +KOKKOS_FUNCTION OutputIterator transform(const TeamHandleType& teamHandle, + InputIterator first1, + InputIterator last1, + OutputIterator d_first, + UnaryOperation unary_op) { + return Impl::transform_team_impl(teamHandle, first1, last1, d_first, + std::move(unary_op)); +} + +template , int> = 0> +KOKKOS_FUNCTION auto transform( + const TeamHandleType& teamHandle, + const ::Kokkos::View& source, + const ::Kokkos::View& dest, + UnaryOperation unary_op) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); + + return Impl::transform_team_impl(teamHandle, begin(source), end(source), + begin(dest), std::move(unary_op)); +} + +template < + typename TeamHandleType, typename InputIterator1, typename InputIterator2, + typename OutputIterator, typename BinaryOperation, + std::enable_if_t< + Impl::are_iterators_v && + is_team_handle_v, + int> = 0> +KOKKOS_FUNCTION OutputIterator transform(const TeamHandleType& teamHandle, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + OutputIterator d_first, + BinaryOperation binary_op) { + return Impl::transform_team_impl(teamHandle, first1, last1, first2, d_first, + std::move(binary_op)); +} + +template , int> = 0> +KOKKOS_FUNCTION auto transform( + const TeamHandleType& teamHandle, + const ::Kokkos::View& source1, + const ::Kokkos::View& source2, + const ::Kokkos::View& dest, + BinaryOperation binary_op) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source1); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source2); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); + + return Impl::transform_team_impl(teamHandle, begin(source1), end(source1), + begin(source2), begin(dest), + std::move(binary_op)); } } // namespace Experimental diff --git a/algorithms/src/std_algorithms/Kokkos_TransformExclusiveScan.hpp b/algorithms/src/std_algorithms/Kokkos_TransformExclusiveScan.hpp index 9d85aee06f..37fc0f860e 100644 --- a/algorithms/src/std_algorithms/Kokkos_TransformExclusiveScan.hpp +++ b/algorithms/src/std_algorithms/Kokkos_TransformExclusiveScan.hpp @@ -23,44 +23,52 @@ namespace Kokkos { namespace Experimental { -template -std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators< - InputIteratorType, OutputIteratorType>::value, - OutputIteratorType> -transform_exclusive_scan(const ExecutionSpace& ex, InputIteratorType first, - InputIteratorType last, OutputIteratorType first_dest, - ValueType init_value, BinaryOpType binary_op, - UnaryOpType unary_op) { +// +// overload set accepting execution space +// +template && :: + Kokkos::is_execution_space_v, + int> = 0> +OutputIteratorType transform_exclusive_scan( + const ExecutionSpace& ex, InputIteratorType first, InputIteratorType last, + OutputIteratorType first_dest, ValueType init_value, BinaryOpType binary_op, + UnaryOpType unary_op) { Impl::static_assert_is_not_openmptarget(ex); - static_assert(std::is_move_constructible::value, + static_assert(std::is_move_constructible_v, "ValueType must be move constructible."); - return Impl::transform_exclusive_scan_impl( + return Impl::transform_exclusive_scan_exespace_impl( "Kokkos::transform_exclusive_scan_custom_functors_iterator_api", ex, - first, last, first_dest, init_value, binary_op, unary_op); + first, last, first_dest, std::move(init_value), binary_op, unary_op); } -template -std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators< - InputIteratorType, OutputIteratorType>::value, - OutputIteratorType> -transform_exclusive_scan(const std::string& label, const ExecutionSpace& ex, - InputIteratorType first, InputIteratorType last, - OutputIteratorType first_dest, ValueType init_value, - BinaryOpType binary_op, UnaryOpType unary_op) { +template && :: + Kokkos::is_execution_space_v, + int> = 0> +OutputIteratorType transform_exclusive_scan( + const std::string& label, const ExecutionSpace& ex, InputIteratorType first, + InputIteratorType last, OutputIteratorType first_dest, ValueType init_value, + BinaryOpType binary_op, UnaryOpType unary_op) { Impl::static_assert_is_not_openmptarget(ex); - static_assert(std::is_move_constructible::value, + static_assert(std::is_move_constructible_v, "ValueType must be move constructible."); - return Impl::transform_exclusive_scan_impl(label, ex, first, last, first_dest, - init_value, binary_op, unary_op); + return Impl::transform_exclusive_scan_exespace_impl( + label, ex, first, last, first_dest, std::move(init_value), binary_op, + unary_op); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename ValueType, + typename BinaryOpType, typename UnaryOpType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto transform_exclusive_scan( const ExecutionSpace& ex, const ::Kokkos::View& view_from, @@ -69,18 +77,20 @@ auto transform_exclusive_scan( Impl::static_assert_is_not_openmptarget(ex); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); - static_assert(std::is_move_constructible::value, + static_assert(std::is_move_constructible_v, "ValueType must be move constructible."); namespace KE = ::Kokkos::Experimental; - return Impl::transform_exclusive_scan_impl( + return Impl::transform_exclusive_scan_exespace_impl( "Kokkos::transform_exclusive_scan_custom_functors_view_api", ex, KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), - init_value, binary_op, unary_op); + std::move(init_value), binary_op, unary_op); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename ValueType, + typename BinaryOpType, typename UnaryOpType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto transform_exclusive_scan( const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view_from, @@ -89,12 +99,56 @@ auto transform_exclusive_scan( Impl::static_assert_is_not_openmptarget(ex); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); - static_assert(std::is_move_constructible::value, + static_assert(std::is_move_constructible_v, "ValueType must be move constructible."); namespace KE = ::Kokkos::Experimental; - return Impl::transform_exclusive_scan_impl( + return Impl::transform_exclusive_scan_exespace_impl( label, ex, KE::cbegin(view_from), KE::cend(view_from), - KE::begin(view_dest), init_value, binary_op, unary_op); + KE::begin(view_dest), std::move(init_value), binary_op, unary_op); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template && :: + Kokkos::is_team_handle_v, + int> = 0> +KOKKOS_FUNCTION OutputIteratorType transform_exclusive_scan( + const TeamHandleType& teamHandle, InputIteratorType first, + InputIteratorType last, OutputIteratorType first_dest, ValueType init_value, + BinaryOpType binary_op, UnaryOpType unary_op) { + Impl::static_assert_is_not_openmptarget(teamHandle); + static_assert(std::is_move_constructible_v, + "ValueType must be move constructible."); + return Impl::transform_exclusive_scan_team_impl( + teamHandle, first, last, first_dest, std::move(init_value), binary_op, + unary_op); +} + +template , int> = 0> +KOKKOS_FUNCTION auto transform_exclusive_scan( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view_from, + const ::Kokkos::View& view_dest, + ValueType init_value, BinaryOpType binary_op, UnaryOpType unary_op) { + Impl::static_assert_is_not_openmptarget(teamHandle); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + static_assert(std::is_move_constructible_v, + "ValueType must be move constructible."); + namespace KE = ::Kokkos::Experimental; + return Impl::transform_exclusive_scan_team_impl( + teamHandle, KE::cbegin(view_from), KE::cend(view_from), + KE::begin(view_dest), std::move(init_value), binary_op, unary_op); } } // namespace Experimental diff --git a/algorithms/src/std_algorithms/Kokkos_TransformInclusiveScan.hpp b/algorithms/src/std_algorithms/Kokkos_TransformInclusiveScan.hpp index 7489af7e37..5f694dbfd9 100644 --- a/algorithms/src/std_algorithms/Kokkos_TransformInclusiveScan.hpp +++ b/algorithms/src/std_algorithms/Kokkos_TransformInclusiveScan.hpp @@ -23,40 +23,53 @@ namespace Kokkos { namespace Experimental { +// +// overload set accepting execution space +// + // overload set 1 (no init value) -template -std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators< - InputIteratorType, OutputIteratorType>::value, - OutputIteratorType> -transform_inclusive_scan(const ExecutionSpace& ex, InputIteratorType first, - InputIteratorType last, OutputIteratorType first_dest, - BinaryOpType binary_op, UnaryOpType unary_op) { +template && :: + Kokkos::is_execution_space_v, + int> = 0> +OutputIteratorType transform_inclusive_scan(const ExecutionSpace& ex, + InputIteratorType first, + InputIteratorType last, + OutputIteratorType first_dest, + BinaryOpType binary_op, + UnaryOpType unary_op) { Impl::static_assert_is_not_openmptarget(ex); - return Impl::transform_inclusive_scan_impl( + return Impl::transform_inclusive_scan_exespace_impl( "Kokkos::transform_inclusive_scan_custom_functors_iterator_api", ex, first, last, first_dest, binary_op, unary_op); } -template -std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators< - InputIteratorType, OutputIteratorType>::value, - OutputIteratorType> -transform_inclusive_scan(const std::string& label, const ExecutionSpace& ex, - InputIteratorType first, InputIteratorType last, - OutputIteratorType first_dest, BinaryOpType binary_op, - UnaryOpType unary_op) { +template && :: + Kokkos::is_execution_space_v, + int> = 0> +OutputIteratorType transform_inclusive_scan( + const std::string& label, const ExecutionSpace& ex, InputIteratorType first, + InputIteratorType last, OutputIteratorType first_dest, + BinaryOpType binary_op, UnaryOpType unary_op) { Impl::static_assert_is_not_openmptarget(ex); - return Impl::transform_inclusive_scan_impl(label, ex, first, last, first_dest, - binary_op, unary_op); + return Impl::transform_inclusive_scan_exespace_impl( + label, ex, first, last, first_dest, binary_op, unary_op); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename BinaryOpType, + typename UnaryOpType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto transform_inclusive_scan( const ExecutionSpace& ex, const ::Kokkos::View& view_from, @@ -66,15 +79,17 @@ auto transform_inclusive_scan( Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); namespace KE = ::Kokkos::Experimental; - return Impl::transform_inclusive_scan_impl( + return Impl::transform_inclusive_scan_exespace_impl( "Kokkos::transform_inclusive_scan_custom_functors_view_api", ex, KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), binary_op, unary_op); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename BinaryOpType, + typename UnaryOpType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto transform_inclusive_scan( const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view_from, @@ -84,46 +99,59 @@ auto transform_inclusive_scan( Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); namespace KE = ::Kokkos::Experimental; - return Impl::transform_inclusive_scan_impl( + return Impl::transform_inclusive_scan_exespace_impl( label, ex, KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), binary_op, unary_op); } // overload set 2 (init value) -template -std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators< - InputIteratorType, OutputIteratorType>::value, - OutputIteratorType> -transform_inclusive_scan(const ExecutionSpace& ex, InputIteratorType first, - InputIteratorType last, OutputIteratorType first_dest, - BinaryOpType binary_op, UnaryOpType unary_op, - ValueType init_value) { +template && :: + Kokkos::is_execution_space_v, + int> = 0> +OutputIteratorType transform_inclusive_scan( + const ExecutionSpace& ex, InputIteratorType first, InputIteratorType last, + OutputIteratorType first_dest, BinaryOpType binary_op, UnaryOpType unary_op, + ValueType init_value) { Impl::static_assert_is_not_openmptarget(ex); - return Impl::transform_inclusive_scan_impl( + static_assert(std::is_move_constructible_v, + "ValueType must be move constructible."); + + return Impl::transform_inclusive_scan_exespace_impl( "Kokkos::transform_inclusive_scan_custom_functors_iterator_api", ex, - first, last, first_dest, binary_op, unary_op, init_value); + first, last, first_dest, binary_op, unary_op, std::move(init_value)); } -template -std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators< - InputIteratorType, OutputIteratorType>::value, - OutputIteratorType> -transform_inclusive_scan(const std::string& label, const ExecutionSpace& ex, - InputIteratorType first, InputIteratorType last, - OutputIteratorType first_dest, BinaryOpType binary_op, - UnaryOpType unary_op, ValueType init_value) { +template && :: + Kokkos::is_execution_space_v, + int> = 0> +OutputIteratorType transform_inclusive_scan( + const std::string& label, const ExecutionSpace& ex, InputIteratorType first, + InputIteratorType last, OutputIteratorType first_dest, + BinaryOpType binary_op, UnaryOpType unary_op, ValueType init_value) { Impl::static_assert_is_not_openmptarget(ex); - return Impl::transform_inclusive_scan_impl(label, ex, first, last, first_dest, - binary_op, unary_op, init_value); + static_assert(std::is_move_constructible_v, + "ValueType must be move constructible."); + + return Impl::transform_inclusive_scan_exespace_impl( + label, ex, first, last, first_dest, binary_op, unary_op, + std::move(init_value)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename BinaryOpType, + typename UnaryOpType, typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto transform_inclusive_scan( const ExecutionSpace& ex, const ::Kokkos::View& view_from, @@ -132,16 +160,21 @@ auto transform_inclusive_scan( Impl::static_assert_is_not_openmptarget(ex); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + static_assert(std::is_move_constructible_v, + "ValueType must be move constructible."); + namespace KE = ::Kokkos::Experimental; - return Impl::transform_inclusive_scan_impl( + return Impl::transform_inclusive_scan_exespace_impl( "Kokkos::transform_inclusive_scan_custom_functors_view_api", ex, KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), - binary_op, unary_op, init_value); + binary_op, unary_op, std::move(init_value)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename BinaryOpType, + typename UnaryOpType, typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto transform_inclusive_scan( const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view_from, @@ -150,10 +183,97 @@ auto transform_inclusive_scan( Impl::static_assert_is_not_openmptarget(ex); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + static_assert(std::is_move_constructible_v, + "ValueType must be move constructible."); + namespace KE = ::Kokkos::Experimental; - return Impl::transform_inclusive_scan_impl( + return Impl::transform_inclusive_scan_exespace_impl( label, ex, KE::cbegin(view_from), KE::cend(view_from), - KE::begin(view_dest), binary_op, unary_op, init_value); + KE::begin(view_dest), binary_op, unary_op, std::move(init_value)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// + +// overload set 1 (no init value) +template && + Kokkos::is_team_handle_v, + int> = 0> +KOKKOS_FUNCTION OutputIteratorType transform_inclusive_scan( + const TeamHandleType& teamHandle, InputIteratorType first, + InputIteratorType last, OutputIteratorType first_dest, + BinaryOpType binary_op, UnaryOpType unary_op) { + Impl::static_assert_is_not_openmptarget(teamHandle); + + return Impl::transform_inclusive_scan_team_impl( + teamHandle, first, last, first_dest, binary_op, unary_op); +} + +template , int> = 0> +KOKKOS_FUNCTION auto transform_inclusive_scan( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view_from, + const ::Kokkos::View& view_dest, + BinaryOpType binary_op, UnaryOpType unary_op) { + Impl::static_assert_is_not_openmptarget(teamHandle); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + namespace KE = ::Kokkos::Experimental; + return Impl::transform_inclusive_scan_team_impl( + teamHandle, KE::cbegin(view_from), KE::cend(view_from), + KE::begin(view_dest), binary_op, unary_op); +} + +// overload set 2 (init value) +template && + Kokkos::is_team_handle_v, + int> = 0> +KOKKOS_FUNCTION OutputIteratorType transform_inclusive_scan( + const TeamHandleType& teamHandle, InputIteratorType first, + InputIteratorType last, OutputIteratorType first_dest, + BinaryOpType binary_op, UnaryOpType unary_op, ValueType init_value) { + Impl::static_assert_is_not_openmptarget(teamHandle); + static_assert(std::is_move_constructible_v, + "ValueType must be move constructible."); + + return Impl::transform_inclusive_scan_team_impl( + teamHandle, first, last, first_dest, binary_op, unary_op, + std::move(init_value)); +} + +template , int> = 0> +KOKKOS_FUNCTION auto transform_inclusive_scan( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view_from, + const ::Kokkos::View& view_dest, + BinaryOpType binary_op, UnaryOpType unary_op, ValueType init_value) { + Impl::static_assert_is_not_openmptarget(teamHandle); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + static_assert(std::is_move_constructible_v, + "ValueType must be move constructible."); + + namespace KE = ::Kokkos::Experimental; + return Impl::transform_inclusive_scan_team_impl( + teamHandle, KE::cbegin(view_from), KE::cend(view_from), + KE::begin(view_dest), binary_op, unary_op, std::move(init_value)); } } // namespace Experimental diff --git a/algorithms/src/std_algorithms/Kokkos_TransformReduce.hpp b/algorithms/src/std_algorithms/Kokkos_TransformReduce.hpp index b5ec9066d2..101f5113f6 100644 --- a/algorithms/src/std_algorithms/Kokkos_TransformReduce.hpp +++ b/algorithms/src/std_algorithms/Kokkos_TransformReduce.hpp @@ -23,34 +23,44 @@ namespace Kokkos { namespace Experimental { +// +// overload set accepting execution space +// + // ---------------------------- // overload set1: // no custom functors passed, so equivalent to // transform_reduce(first1, last1, first2, init, plus<>(), multiplies<>()); // ---------------------------- -template +template ::value, + int> = 0> ValueType transform_reduce(const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, ValueType init_reduction_value) { - return Impl::transform_reduce_default_functors_impl( + return Impl::transform_reduce_default_functors_exespace_impl( "Kokkos::transform_reduce_default_functors_iterator_api", ex, first1, last1, first2, std::move(init_reduction_value)); } -template +template ::value, + int> = 0> ValueType transform_reduce(const std::string& label, const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, ValueType init_reduction_value) { - return Impl::transform_reduce_default_functors_impl( + return Impl::transform_reduce_default_functors_exespace_impl( label, ex, first1, last1, first2, std::move(init_reduction_value)); } // overload1 accepting views -template +template ::value, + int> = 0> ValueType transform_reduce( const ExecutionSpace& ex, const ::Kokkos::View& first_view, @@ -60,14 +70,16 @@ ValueType transform_reduce( Impl::static_assert_is_admissible_to_kokkos_std_algorithms(first_view); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(second_view); - return Impl::transform_reduce_default_functors_impl( + return Impl::transform_reduce_default_functors_exespace_impl( "Kokkos::transform_reduce_default_functors_iterator_api", ex, KE::cbegin(first_view), KE::cend(first_view), KE::cbegin(second_view), std::move(init_reduction_value)); } -template +template ::value, + int> = 0> ValueType transform_reduce( const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& first_view, @@ -77,7 +89,7 @@ ValueType transform_reduce( Impl::static_assert_is_admissible_to_kokkos_std_algorithms(first_view); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(second_view); - return Impl::transform_reduce_default_functors_impl( + return Impl::transform_reduce_default_functors_exespace_impl( label, ex, KE::cbegin(first_view), KE::cend(first_view), KE::cbegin(second_view), std::move(init_reduction_value)); } @@ -95,8 +107,11 @@ ValueType transform_reduce( // https://en.cppreference.com/w/cpp/algorithm/transform_reduce // api accepting iterators -template +template < + typename ExecutionSpace, typename IteratorType1, typename IteratorType2, + typename ValueType, typename BinaryJoinerType, typename BinaryTransform, + std::enable_if_t<::Kokkos::is_execution_space::value, int> = + 0> ValueType transform_reduce(const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, ValueType init_reduction_value, @@ -105,14 +120,17 @@ ValueType transform_reduce(const ExecutionSpace& ex, IteratorType1 first1, static_assert(std::is_move_constructible::value, "ValueType must be move constructible."); - return Impl::transform_reduce_custom_functors_impl( + return Impl::transform_reduce_custom_functors_exespace_impl( "Kokkos::transform_reduce_custom_functors_iterator_api", ex, first1, last1, first2, std::move(init_reduction_value), std::move(joiner), std::move(transformer)); } -template +template < + typename ExecutionSpace, typename IteratorType1, typename IteratorType2, + typename ValueType, typename BinaryJoinerType, typename BinaryTransform, + std::enable_if_t<::Kokkos::is_execution_space::value, int> = + 0> ValueType transform_reduce(const std::string& label, const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, ValueType init_reduction_value, @@ -121,15 +139,17 @@ ValueType transform_reduce(const std::string& label, const ExecutionSpace& ex, static_assert(std::is_move_constructible::value, "ValueType must be move constructible."); - return Impl::transform_reduce_custom_functors_impl( + return Impl::transform_reduce_custom_functors_exespace_impl( label, ex, first1, last1, first2, std::move(init_reduction_value), std::move(joiner), std::move(transformer)); } // accepting views -template +template ::value, + int> = 0> ValueType transform_reduce( const ExecutionSpace& ex, const ::Kokkos::View& first_view, @@ -143,16 +163,18 @@ ValueType transform_reduce( Impl::static_assert_is_admissible_to_kokkos_std_algorithms(first_view); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(second_view); - return Impl::transform_reduce_custom_functors_impl( + return Impl::transform_reduce_custom_functors_exespace_impl( "Kokkos::transform_reduce_custom_functors_view_api", ex, KE::cbegin(first_view), KE::cend(first_view), KE::cbegin(second_view), std::move(init_reduction_value), std::move(joiner), std::move(transformer)); } -template +template ::value, + int> = 0> ValueType transform_reduce( const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& first_view, @@ -166,7 +188,7 @@ ValueType transform_reduce( Impl::static_assert_is_admissible_to_kokkos_std_algorithms(first_view); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(second_view); - return Impl::transform_reduce_custom_functors_impl( + return Impl::transform_reduce_custom_functors_exespace_impl( label, ex, KE::cbegin(first_view), KE::cend(first_view), KE::cbegin(second_view), std::move(init_reduction_value), std::move(joiner), std::move(transformer)); @@ -176,43 +198,50 @@ ValueType transform_reduce( // overload set3: // // accepting iterators -template -// need this to avoid ambiguous call -std::enable_if_t< - ::Kokkos::Experimental::Impl::are_iterators::value, ValueType> -transform_reduce(const ExecutionSpace& ex, IteratorType first1, - IteratorType last1, ValueType init_reduction_value, - BinaryJoinerType joiner, UnaryTransform transformer) { +template ::value && + is_execution_space::value, + int> = 0> +ValueType transform_reduce(const ExecutionSpace& ex, IteratorType first1, + IteratorType last1, ValueType init_reduction_value, + BinaryJoinerType joiner, + UnaryTransform transformer) { static_assert(std::is_move_constructible::value, "ValueType must be move constructible."); - return Impl::transform_reduce_custom_functors_impl( + return Impl::transform_reduce_custom_functors_exespace_impl( "Kokkos::transform_reduce_custom_functors_iterator_api", ex, first1, last1, std::move(init_reduction_value), std::move(joiner), std::move(transformer)); } -template -// need this to avoid ambiguous call -std::enable_if_t< - ::Kokkos::Experimental::Impl::are_iterators::value, ValueType> -transform_reduce(const std::string& label, const ExecutionSpace& ex, - IteratorType first1, IteratorType last1, - ValueType init_reduction_value, BinaryJoinerType joiner, - UnaryTransform transformer) { +template ::value && + is_execution_space::value, + int> = 0> +ValueType transform_reduce(const std::string& label, const ExecutionSpace& ex, + IteratorType first1, IteratorType last1, + ValueType init_reduction_value, + BinaryJoinerType joiner, + UnaryTransform transformer) { static_assert(std::is_move_constructible::value, "ValueType must be move constructible."); - return Impl::transform_reduce_custom_functors_impl( + return Impl::transform_reduce_custom_functors_exespace_impl( label, ex, first1, last1, std::move(init_reduction_value), std::move(joiner), std::move(transformer)); } // accepting views -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename ValueType, typename BinaryJoinerType, typename UnaryTransform, + std::enable_if_t<::Kokkos::is_execution_space::value, int> = + 0> ValueType transform_reduce(const ExecutionSpace& ex, const ::Kokkos::View& view, ValueType init_reduction_value, @@ -224,14 +253,17 @@ ValueType transform_reduce(const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::transform_reduce_custom_functors_impl( + return Impl::transform_reduce_custom_functors_exespace_impl( "Kokkos::transform_reduce_custom_functors_view_api", ex, KE::cbegin(view), KE::cend(view), std::move(init_reduction_value), std::move(joiner), std::move(transformer)); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename ValueType, typename BinaryJoinerType, typename UnaryTransform, + std::enable_if_t<::Kokkos::is_execution_space::value, int> = + 0> ValueType transform_reduce(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, ValueType init_reduction_value, @@ -243,12 +275,154 @@ ValueType transform_reduce(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::transform_reduce_custom_functors_impl( + return Impl::transform_reduce_custom_functors_exespace_impl( label, ex, KE::cbegin(view), KE::cend(view), std::move(init_reduction_value), std::move(joiner), std::move(transformer)); } +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// + +// ---------------------------- +// overload set1: +// no custom functors passed, so equivalent to +// transform_reduce(first1, last1, first2, init, plus<>(), multiplies<>()); +// ---------------------------- +template < + typename TeamHandleType, typename IteratorType1, typename IteratorType2, + typename ValueType, + std::enable_if_t<::Kokkos::is_team_handle::value, int> = 0> +KOKKOS_FUNCTION ValueType transform_reduce(const TeamHandleType& teamHandle, + IteratorType1 first1, + IteratorType1 last1, + IteratorType2 first2, + ValueType init_reduction_value) { + return Impl::transform_reduce_default_functors_team_impl( + teamHandle, first1, last1, first2, std::move(init_reduction_value)); +} + +// overload1 accepting views +template < + typename TeamHandleType, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename ValueType, + std::enable_if_t<::Kokkos::is_team_handle::value, int> = 0> +KOKKOS_FUNCTION ValueType +transform_reduce(const TeamHandleType& teamHandle, + const ::Kokkos::View& first_view, + const ::Kokkos::View& second_view, + ValueType init_reduction_value) { + namespace KE = ::Kokkos::Experimental; + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(first_view); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(second_view); + + return Impl::transform_reduce_default_functors_team_impl( + teamHandle, KE::cbegin(first_view), KE::cend(first_view), + KE::cbegin(second_view), std::move(init_reduction_value)); +} + +// +// overload set2: +// accepts a custom transform and joiner functor +// + +// Note the std refers to the arg BinaryReductionOp +// but in the Kokkos naming convention, it corresponds +// to a "joiner" that knows how to join two values +// NOTE: "joiner/transformer" need to be commutative. + +// https://en.cppreference.com/w/cpp/algorithm/transform_reduce + +// api accepting iterators +template < + typename TeamHandleType, typename IteratorType1, typename IteratorType2, + typename ValueType, typename BinaryJoinerType, typename BinaryTransform, + std::enable_if_t<::Kokkos::is_team_handle::value, int> = 0> +KOKKOS_FUNCTION ValueType transform_reduce( + const TeamHandleType& teamHandle, IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2, ValueType init_reduction_value, + BinaryJoinerType joiner, BinaryTransform transformer) { + static_assert(std::is_move_constructible::value, + "ValueType must be move constructible."); + + return Impl::transform_reduce_custom_functors_team_impl( + teamHandle, first1, last1, first2, std::move(init_reduction_value), + std::move(joiner), std::move(transformer)); +} + +// accepting views +template < + typename TeamHandleType, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename ValueType, + typename BinaryJoinerType, typename BinaryTransform, + std::enable_if_t<::Kokkos::is_team_handle::value, int> = 0> +KOKKOS_FUNCTION ValueType +transform_reduce(const TeamHandleType& teamHandle, + const ::Kokkos::View& first_view, + const ::Kokkos::View& second_view, + ValueType init_reduction_value, BinaryJoinerType joiner, + BinaryTransform transformer) { + namespace KE = ::Kokkos::Experimental; + static_assert(std::is_move_constructible::value, + "ValueType must be move constructible."); + + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(first_view); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(second_view); + + return Impl::transform_reduce_custom_functors_team_impl( + teamHandle, KE::cbegin(first_view), KE::cend(first_view), + KE::cbegin(second_view), std::move(init_reduction_value), + std::move(joiner), std::move(transformer)); +} + +// +// overload set3: +// +// accepting iterators +template ::value && + is_team_handle::value, + int> = 0> +KOKKOS_FUNCTION ValueType transform_reduce(const TeamHandleType& teamHandle, + IteratorType first1, + IteratorType last1, + ValueType init_reduction_value, + BinaryJoinerType joiner, + UnaryTransform transformer) { + static_assert(std::is_move_constructible::value, + "ValueType must be move constructible."); + + return Impl::transform_reduce_custom_functors_team_impl( + teamHandle, first1, last1, std::move(init_reduction_value), + std::move(joiner), std::move(transformer)); +} + +// accepting views +template < + typename TeamHandleType, typename DataType, typename... Properties, + typename ValueType, typename BinaryJoinerType, typename UnaryTransform, + std::enable_if_t<::Kokkos::is_team_handle::value, int> = 0> +KOKKOS_FUNCTION ValueType +transform_reduce(const TeamHandleType& teamHandle, + const ::Kokkos::View& view, + ValueType init_reduction_value, BinaryJoinerType joiner, + UnaryTransform transformer) { + namespace KE = ::Kokkos::Experimental; + static_assert(std::is_move_constructible::value, + "ValueType must be move constructible."); + + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + + return Impl::transform_reduce_custom_functors_team_impl( + teamHandle, KE::cbegin(view), KE::cend(view), + std::move(init_reduction_value), std::move(joiner), + std::move(transformer)); +} + } // namespace Experimental } // namespace Kokkos diff --git a/algorithms/src/std_algorithms/Kokkos_Unique.hpp b/algorithms/src/std_algorithms/Kokkos_Unique.hpp index b47ecffb20..2d56315f61 100644 --- a/algorithms/src/std_algorithms/Kokkos_Unique.hpp +++ b/algorithms/src/std_algorithms/Kokkos_Unique.hpp @@ -23,71 +23,132 @@ namespace Kokkos { namespace Experimental { -// note: the enable_if below is to avoid "call to ... is ambiguous" -// for example in the unit test when using a variadic function - -// overload set1 -template -std::enable_if_t::value, IteratorType> unique( - const ExecutionSpace& ex, IteratorType first, IteratorType last) { - return Impl::unique_impl("Kokkos::unique_iterator_api_default", ex, first, - last); +// +// overload set1: default predicate, accepting execution space +// +template && + is_execution_space::value, + int> = 0> +IteratorType unique(const ExecutionSpace& ex, IteratorType first, + IteratorType last) { + return Impl::unique_exespace_impl("Kokkos::unique_iterator_api_default", ex, + first, last); } -template -std::enable_if_t::value, IteratorType> unique( - const std::string& label, const ExecutionSpace& ex, IteratorType first, - IteratorType last) { - return Impl::unique_impl(label, ex, first, last); +template && + is_execution_space::value, + int> = 0> +IteratorType unique(const std::string& label, const ExecutionSpace& ex, + IteratorType first, IteratorType last) { + return Impl::unique_exespace_impl(label, ex, first, last); } -template +template ::value, int> = 0> auto unique(const ExecutionSpace& ex, const ::Kokkos::View& view) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return ::Kokkos::Experimental::unique("Kokkos::unique_view_api_default", ex, - begin(view), end(view)); + return Impl::unique_exespace_impl("Kokkos::unique_view_api_default", ex, + begin(view), end(view)); } -template +template ::value, int> = 0> auto unique(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return ::Kokkos::Experimental::unique(label, ex, begin(view), end(view)); + return Impl::unique_exespace_impl(label, ex, begin(view), end(view)); } -// overload set2 -template +// +// overload set2: custom predicate, accepting execution space +// +template ::value, int> = 0> IteratorType unique(const ExecutionSpace& ex, IteratorType first, IteratorType last, BinaryPredicate pred) { - return Impl::unique_impl("Kokkos::unique_iterator_api_default", ex, first, - last, pred); + return Impl::unique_exespace_impl("Kokkos::unique_iterator_api_default", ex, + first, last, pred); } -template +template ::value, int> = 0> IteratorType unique(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, BinaryPredicate pred) { - return Impl::unique_impl(label, ex, first, last, pred); + return Impl::unique_exespace_impl(label, ex, first, last, pred); } -template +template ::value, int> = 0> auto unique(const ExecutionSpace& ex, const ::Kokkos::View& view, BinaryPredicate pred) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::unique_impl("Kokkos::unique_view_api_default", ex, begin(view), - end(view), std::move(pred)); + return Impl::unique_exespace_impl("Kokkos::unique_view_api_default", ex, + begin(view), end(view), std::move(pred)); } -template +template ::value, int> = 0> auto unique(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, BinaryPredicate pred) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::unique_impl(label, ex, begin(view), end(view), std::move(pred)); + return Impl::unique_exespace_impl(label, ex, begin(view), end(view), + std::move(pred)); +} + +// +// overload set3: default predicate, accepting team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template && + is_team_handle::value, + int> = 0> +KOKKOS_FUNCTION IteratorType unique(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last) { + return Impl::unique_team_impl(teamHandle, first, last); +} + +template ::value, int> = 0> +KOKKOS_FUNCTION auto unique( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view) { + return Impl::unique_team_impl(teamHandle, begin(view), end(view)); +} + +// +// overload set4: custom predicate, accepting team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template ::value, int> = 0> +KOKKOS_FUNCTION IteratorType unique(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last, + BinaryPredicate pred) { + return Impl::unique_team_impl(teamHandle, first, last, std::move(pred)); +} + +template ::value, int> = 0> +KOKKOS_FUNCTION auto unique(const TeamHandleType& teamHandle, + const ::Kokkos::View& view, + BinaryPredicate pred) { + return Impl::unique_team_impl(teamHandle, begin(view), end(view), + std::move(pred)); } } // namespace Experimental diff --git a/algorithms/src/std_algorithms/Kokkos_UniqueCopy.hpp b/algorithms/src/std_algorithms/Kokkos_UniqueCopy.hpp index bd2451c220..4a32d7e095 100644 --- a/algorithms/src/std_algorithms/Kokkos_UniqueCopy.hpp +++ b/algorithms/src/std_algorithms/Kokkos_UniqueCopy.hpp @@ -23,67 +23,90 @@ namespace Kokkos { namespace Experimental { -// overload set1 -template -std::enable_if_t::value, OutputIterator> -unique_copy(const ExecutionSpace& ex, InputIterator first, InputIterator last, - OutputIterator d_first) { - return Impl::unique_copy_impl("Kokkos::unique_copy_iterator_api_default", ex, - first, last, d_first); +// +// overload set1: default predicate, accepting execution space +// +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + std::enable_if_t && + is_execution_space_v, + int> = 0> +OutputIterator unique_copy(const ExecutionSpace& ex, InputIterator first, + InputIterator last, OutputIterator d_first) { + return Impl::unique_copy_exespace_impl( + "Kokkos::unique_copy_iterator_api_default", ex, first, last, d_first); } -template -std::enable_if_t::value, OutputIterator> -unique_copy(const std::string& label, const ExecutionSpace& ex, - InputIterator first, InputIterator last, OutputIterator d_first) { - return Impl::unique_copy_impl(label, ex, first, last, d_first); +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + std::enable_if_t && + is_execution_space_v, + int> = 0> +OutputIterator unique_copy(const std::string& label, const ExecutionSpace& ex, + InputIterator first, InputIterator last, + OutputIterator d_first) { + return Impl::unique_copy_exespace_impl(label, ex, first, last, d_first); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto unique_copy(const ExecutionSpace& ex, const ::Kokkos::View& source, const ::Kokkos::View& dest) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - return ::Kokkos::Experimental::unique_copy( - "Kokkos::unique_copy_view_api_default", ex, cbegin(source), cend(source), - begin(dest)); + return Impl::unique_copy_exespace_impl("Kokkos::unique_copy_view_api_default", + ex, cbegin(source), cend(source), + begin(dest)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto unique_copy(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& source, const ::Kokkos::View& dest) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - return ::Kokkos::Experimental::unique_copy(label, ex, cbegin(source), - cend(source), begin(dest)); + return Impl::unique_copy_exespace_impl(label, ex, cbegin(source), + cend(source), begin(dest)); } -// overload set2 -template +// +// overload set2: custom predicate, accepting execution space +// + +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + typename BinaryPredicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator unique_copy(const ExecutionSpace& ex, InputIterator first, InputIterator last, OutputIterator d_first, BinaryPredicate pred) { - return Impl::unique_copy_impl("Kokkos::unique_copy_iterator_api_default", ex, - first, last, d_first, pred); + return Impl::unique_copy_exespace_impl( + "Kokkos::unique_copy_iterator_api_default", ex, first, last, d_first, + pred); } -template +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + typename BinaryPredicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator unique_copy(const std::string& label, const ExecutionSpace& ex, InputIterator first, InputIterator last, OutputIterator d_first, BinaryPredicate pred) { - return Impl::unique_copy_impl(label, ex, first, last, d_first, pred); + return Impl::unique_copy_exespace_impl(label, ex, first, last, d_first, pred); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename BinaryPredicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto unique_copy(const ExecutionSpace& ex, const ::Kokkos::View& source, const ::Kokkos::View& dest, @@ -91,13 +114,15 @@ auto unique_copy(const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - return Impl::unique_copy_impl("Kokkos::unique_copy_view_api_default", ex, - cbegin(source), cend(source), begin(dest), - std::move(pred)); + return Impl::unique_copy_exespace_impl("Kokkos::unique_copy_view_api_default", + ex, cbegin(source), cend(source), + begin(dest), std::move(pred)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename BinaryPredicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto unique_copy(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& source, const ::Kokkos::View& dest, @@ -105,8 +130,70 @@ auto unique_copy(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - return Impl::unique_copy_impl(label, ex, cbegin(source), cend(source), - begin(dest), std::move(pred)); + return Impl::unique_copy_exespace_impl( + label, ex, cbegin(source), cend(source), begin(dest), std::move(pred)); +} + +// +// overload set3: default predicate, accepting team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template < + typename TeamHandleType, typename InputIterator, typename OutputIterator, + std::enable_if_t && + Kokkos::is_team_handle_v, + int> = 0> +KOKKOS_FUNCTION OutputIterator unique_copy(const TeamHandleType& teamHandle, + InputIterator first, + InputIterator last, + OutputIterator d_first) { + return Impl::unique_copy_team_impl(teamHandle, first, last, d_first); +} + +template , int> = 0> +KOKKOS_FUNCTION auto unique_copy( + const TeamHandleType& teamHandle, + const ::Kokkos::View& source, + const ::Kokkos::View& dest) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); + + return Impl::unique_copy_team_impl(teamHandle, cbegin(source), cend(source), + begin(dest)); +} + +// +// overload set4: custom predicate, accepting team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION OutputIterator unique_copy(const TeamHandleType& teamHandle, + InputIterator first, + InputIterator last, + OutputIterator d_first, + BinaryPredicate pred) { + return Impl::unique_copy_team_impl(teamHandle, first, last, d_first, pred); +} + +template , int> = 0> +KOKKOS_FUNCTION auto unique_copy( + const TeamHandleType& teamHandle, + const ::Kokkos::View& source, + const ::Kokkos::View& dest, + BinaryPredicate pred) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); + + return Impl::unique_copy_team_impl(teamHandle, cbegin(source), cend(source), + begin(dest), std::move(pred)); } } // namespace Experimental diff --git a/algorithms/src/std_algorithms/impl/Kokkos_AdjacentDifference.hpp b/algorithms/src/std_algorithms/impl/Kokkos_AdjacentDifference.hpp index 8a474508d7..9f7fcf94fe 100644 --- a/algorithms/src/std_algorithms/impl/Kokkos_AdjacentDifference.hpp +++ b/algorithms/src/std_algorithms/impl/Kokkos_AdjacentDifference.hpp @@ -63,14 +63,15 @@ struct StdAdjacentDiffFunctor { m_op(std::move(op)) {} }; +// +// exespace impl +// template -OutputIteratorType adjacent_difference_impl(const std::string& label, - const ExecutionSpace& ex, - InputIteratorType first_from, - InputIteratorType last_from, - OutputIteratorType first_dest, - BinaryOp bin_op) { +OutputIteratorType adjacent_difference_exespace_impl( + const std::string& label, const ExecutionSpace& ex, + InputIteratorType first_from, InputIteratorType last_from, + OutputIteratorType first_dest, BinaryOp bin_op) { // checks Impl::static_assert_random_access_and_accessible(ex, first_from, first_dest); Impl::static_assert_iterators_have_matching_difference_type(first_from, @@ -81,25 +82,60 @@ OutputIteratorType adjacent_difference_impl(const std::string& label, return first_dest; } - // aliases - using value_type = typename OutputIteratorType::value_type; - using aux_view_type = ::Kokkos::View; - using functor_t = - StdAdjacentDiffFunctor; +#ifdef KOKKOS_ENABLE_DEBUG + // check for overlapping iterators + Impl::expect_no_overlap(first_from, last_from, first_dest); +#endif // run const auto num_elements = Kokkos::Experimental::distance(first_from, last_from); - aux_view_type aux_view("aux_view", num_elements); - ::Kokkos::parallel_for(label, - RangePolicy(ex, 0, num_elements), - functor_t(first_from, first_dest, bin_op)); + ::Kokkos::parallel_for( + label, RangePolicy(ex, 0, num_elements), + StdAdjacentDiffFunctor(first_from, first_dest, bin_op)); ex.fence("Kokkos::adjacent_difference: fence after operation"); // return return first_dest + num_elements; } +// +// team impl +// +template +KOKKOS_FUNCTION OutputIteratorType adjacent_difference_team_impl( + const TeamHandleType& teamHandle, InputIteratorType first_from, + InputIteratorType last_from, OutputIteratorType first_dest, + BinaryOp bin_op) { + // checks + Impl::static_assert_random_access_and_accessible(teamHandle, first_from, + first_dest); + Impl::static_assert_iterators_have_matching_difference_type(first_from, + first_dest); + Impl::expect_valid_range(first_from, last_from); + + if (first_from == last_from) { + return first_dest; + } + +#ifdef KOKKOS_ENABLE_DEBUG + // check for overlapping iterators + Impl::expect_no_overlap(first_from, last_from, first_dest); +#endif + + // run + const auto num_elements = + Kokkos::Experimental::distance(first_from, last_from); + ::Kokkos::parallel_for( + TeamThreadRange(teamHandle, 0, num_elements), + StdAdjacentDiffFunctor(first_from, first_dest, bin_op)); + teamHandle.team_barrier(); + + // return + return first_dest + num_elements; +} + } // namespace Impl } // namespace Experimental } // namespace Kokkos diff --git a/algorithms/src/std_algorithms/impl/Kokkos_AdjacentFind.hpp b/algorithms/src/std_algorithms/impl/Kokkos_AdjacentFind.hpp index dd785e603b..f30b7be06a 100644 --- a/algorithms/src/std_algorithms/impl/Kokkos_AdjacentFind.hpp +++ b/algorithms/src/std_algorithms/impl/Kokkos_AdjacentFind.hpp @@ -27,9 +27,9 @@ namespace Kokkos { namespace Experimental { namespace Impl { -template +template struct StdAdjacentFindFunctor { + using index_type = typename IteratorType::difference_type; using red_value_type = typename ReducerType::value_type; IteratorType m_first; @@ -37,13 +37,13 @@ struct StdAdjacentFindFunctor { PredicateType m_p; KOKKOS_FUNCTION - void operator()(const IndexType i, red_value_type& red_value) const { + void operator()(const index_type i, red_value_type& red_value) const { const auto& my_value = m_first[i]; const auto& next_value = m_first[i + 1]; const bool are_equal = m_p(my_value, next_value); // FIXME_NVHPC using a ternary operator causes problems - red_value_type value = {::Kokkos::reduction_identity::min()}; + red_value_type value = {::Kokkos::reduction_identity::min()}; if (are_equal) { value.min_loc_true = i; } @@ -59,10 +59,14 @@ struct StdAdjacentFindFunctor { m_p(std::move(p)) {} }; +// +// exespace impl +// template -IteratorType adjacent_find_impl(const std::string& label, - const ExecutionSpace& ex, IteratorType first, - IteratorType last, PredicateType pred) { +IteratorType adjacent_find_exespace_impl(const std::string& label, + const ExecutionSpace& ex, + IteratorType first, IteratorType last, + PredicateType pred) { // checks Impl::static_assert_random_access_and_accessible(ex, first); Impl::expect_valid_range(first, last); @@ -76,8 +80,6 @@ IteratorType adjacent_find_impl(const std::string& label, using index_type = typename IteratorType::difference_type; using reducer_type = FirstLoc; using reduction_value_type = typename reducer_type::value_type; - using func_t = StdAdjacentFindFunctor; reduction_value_type red_result; reducer_type reducer(red_result); @@ -86,7 +88,8 @@ IteratorType adjacent_find_impl(const std::string& label, // each index i in the reduction checks i and (i+1). ::Kokkos::parallel_reduce( label, RangePolicy(ex, 0, num_elements - 1), - func_t(first, reducer, pred), reducer); + // use CTAD + StdAdjacentFindFunctor(first, reducer, pred), reducer); // fence not needed because reducing into scalar if (red_result.min_loc_true == @@ -98,12 +101,62 @@ IteratorType adjacent_find_impl(const std::string& label, } template -IteratorType adjacent_find_impl(const std::string& label, - const ExecutionSpace& ex, IteratorType first, - IteratorType last) { +IteratorType adjacent_find_exespace_impl(const std::string& label, + const ExecutionSpace& ex, + IteratorType first, + IteratorType last) { + using value_type = typename IteratorType::value_type; + using default_pred_t = StdAlgoEqualBinaryPredicate; + return adjacent_find_exespace_impl(label, ex, first, last, default_pred_t()); +} + +// +// team impl +// +template +KOKKOS_FUNCTION IteratorType +adjacent_find_team_impl(const TeamHandleType& teamHandle, IteratorType first, + IteratorType last, PredicateType pred) { + // checks + Impl::static_assert_random_access_and_accessible(teamHandle, first); + Impl::expect_valid_range(first, last); + + const auto num_elements = Kokkos::Experimental::distance(first, last); + + if (num_elements <= 1) { + return last; + } + + using index_type = typename IteratorType::difference_type; + using reducer_type = FirstLoc; + using reduction_value_type = typename reducer_type::value_type; + + reduction_value_type red_result; + reducer_type reducer(red_result); + + // note that we use below num_elements-1 because + // each index i in the reduction checks i and (i+1). + ::Kokkos::parallel_reduce(TeamThreadRange(teamHandle, 0, num_elements - 1), + // use CTAD + StdAdjacentFindFunctor(first, reducer, pred), + reducer); + + teamHandle.team_barrier(); + + if (red_result.min_loc_true == + ::Kokkos::reduction_identity::min()) { + return last; + } else { + return first + red_result.min_loc_true; + } +} + +template +KOKKOS_FUNCTION IteratorType adjacent_find_team_impl( + const TeamHandleType& teamHandle, IteratorType first, IteratorType last) { using value_type = typename IteratorType::value_type; using default_pred_t = StdAlgoEqualBinaryPredicate; - return adjacent_find_impl(label, ex, first, last, default_pred_t()); + return adjacent_find_team_impl(teamHandle, first, last, default_pred_t()); } } // namespace Impl diff --git a/algorithms/src/std_algorithms/impl/Kokkos_AllOfAnyOfNoneOf.hpp b/algorithms/src/std_algorithms/impl/Kokkos_AllOfAnyOfNoneOf.hpp index ad562070a0..bdc050f9c1 100644 --- a/algorithms/src/std_algorithms/impl/Kokkos_AllOfAnyOfNoneOf.hpp +++ b/algorithms/src/std_algorithms/impl/Kokkos_AllOfAnyOfNoneOf.hpp @@ -23,23 +23,58 @@ namespace Kokkos { namespace Experimental { namespace Impl { +// +// exespace impl +// template -bool all_of_impl(const std::string& label, const ExecutionSpace& ex, - InputIterator first, InputIterator last, Predicate predicate) { - return (find_if_or_not_impl(label, ex, first, last, predicate) == - last); +bool all_of_exespace_impl(const std::string& label, const ExecutionSpace& ex, + InputIterator first, InputIterator last, + Predicate predicate) { + return (find_if_or_not_exespace_impl(label, ex, first, last, + predicate) == last); } template -bool any_of_impl(const std::string& label, const ExecutionSpace& ex, - InputIterator first, InputIterator last, Predicate predicate) { - return (find_if_or_not_impl(label, ex, first, last, predicate) != last); +bool any_of_exespace_impl(const std::string& label, const ExecutionSpace& ex, + InputIterator first, InputIterator last, + Predicate predicate) { + return (find_if_or_not_exespace_impl(label, ex, first, last, + predicate) != last); } template -bool none_of_impl(const std::string& label, const ExecutionSpace& ex, - IteratorType first, IteratorType last, Predicate predicate) { - return (find_if_or_not_impl(label, ex, first, last, predicate) == last); +bool none_of_exespace_impl(const std::string& label, const ExecutionSpace& ex, + IteratorType first, IteratorType last, + Predicate predicate) { + return (find_if_or_not_exespace_impl(label, ex, first, last, + predicate) == last); +} + +// +// team impl +// +template +KOKKOS_FUNCTION bool all_of_team_impl(const TeamHandleType& teamHandle, + InputIterator first, InputIterator last, + Predicate predicate) { + return (find_if_or_not_team_impl(teamHandle, first, last, predicate) == + last); +} + +template +KOKKOS_FUNCTION bool any_of_team_impl(const TeamHandleType& teamHandle, + InputIterator first, InputIterator last, + Predicate predicate) { + return (find_if_or_not_team_impl(teamHandle, first, last, predicate) != + last); +} + +template +KOKKOS_FUNCTION bool none_of_team_impl(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last, + Predicate predicate) { + return (find_if_or_not_team_impl(teamHandle, first, last, predicate) == + last); } } // namespace Impl diff --git a/algorithms/src/std_algorithms/impl/Kokkos_Constraints.hpp b/algorithms/src/std_algorithms/impl/Kokkos_Constraints.hpp index 0376100410..54bb13e25b 100644 --- a/algorithms/src/std_algorithms/impl/Kokkos_Constraints.hpp +++ b/algorithms/src/std_algorithms/impl/Kokkos_Constraints.hpp @@ -24,18 +24,21 @@ namespace Kokkos { namespace Experimental { namespace Impl { +template +class RandomAccessIterator; + template struct is_admissible_to_kokkos_std_algorithms : std::false_type {}; template struct is_admissible_to_kokkos_std_algorithms< - T, std::enable_if_t< ::Kokkos::is_view::value && T::rank() == 1 && - (std::is_same::value || - std::is_same::value || - std::is_same::value)> > + T, std::enable_if_t<::Kokkos::is_view::value && T::rank() == 1 && + (std::is_same::value || + std::is_same::value || + std::is_same::value)>> : std::true_type {}; template @@ -55,6 +58,21 @@ using iterator_category_t = typename T::iterator_category; template using is_iterator = Kokkos::is_detected; +template +inline constexpr bool is_iterator_v = is_iterator::value; + +template +struct is_kokkos_iterator : std::false_type {}; + +template +struct is_kokkos_iterator> { + static constexpr bool value = + is_admissible_to_kokkos_std_algorithms::value; +}; + +template +inline constexpr bool is_kokkos_iterator_v = is_kokkos_iterator::value; + // // are_iterators // @@ -63,15 +81,18 @@ struct are_iterators; template struct are_iterators { - static constexpr bool value = is_iterator::value; + static constexpr bool value = is_iterator_v; }; template struct are_iterators { static constexpr bool value = - are_iterators::value && are_iterators::value; + are_iterators::value && (are_iterators::value && ... && true); }; +template +inline constexpr bool are_iterators_v = are_iterators::value; + // // are_random_access_iterators // @@ -81,17 +102,21 @@ struct are_random_access_iterators; template struct are_random_access_iterators { static constexpr bool value = - is_iterator::value && - std::is_base_of::value; + is_iterator_v && std::is_base_of::value; }; template struct are_random_access_iterators { - static constexpr bool value = are_random_access_iterators::value && - are_random_access_iterators::value; + static constexpr bool value = + are_random_access_iterators::value && + (are_random_access_iterators::value && ... && true); }; +template +inline constexpr bool are_random_access_iterators_v = + are_random_access_iterators::value; + // // iterators_are_accessible_from // @@ -113,16 +138,18 @@ struct iterators_are_accessible_from { iterators_are_accessible_from::value; }; -template +template KOKKOS_INLINE_FUNCTION constexpr void -static_assert_random_access_and_accessible(const ExecutionSpace& /* ex */, - IteratorTypes... /* iterators */) { +static_assert_random_access_and_accessible( + const ExecutionSpaceOrTeamHandleType& /* ex_or_th*/, + IteratorTypes... /* iterators */) { static_assert( are_random_access_iterators::value, "Currently, Kokkos standard algorithms require random access iterators."); - static_assert( - iterators_are_accessible_from::value, - "Incompatible view/iterator and execution space"); + static_assert(iterators_are_accessible_from< + typename ExecutionSpaceOrTeamHandleType::execution_space, + IteratorTypes...>::value, + "Incompatible view/iterator and execution space"); } // @@ -182,10 +209,10 @@ struct not_openmptarget { #endif }; -template +template KOKKOS_INLINE_FUNCTION constexpr void static_assert_is_not_openmptarget( - const ExecutionSpace&) { - static_assert(not_openmptarget::value, + const ExecutionSpaceOrTeamHandleType& /*ex_or_th*/) { + static_assert(not_openmptarget::value, "Currently, Kokkos standard algorithms do not support custom " "comparators in OpenMPTarget"); } @@ -194,7 +221,8 @@ KOKKOS_INLINE_FUNCTION constexpr void static_assert_is_not_openmptarget( // valid range // template -void expect_valid_range(IteratorType first, IteratorType last) { +KOKKOS_INLINE_FUNCTION void expect_valid_range(IteratorType first, + IteratorType last) { // this is a no-op for release KOKKOS_EXPECTS(last >= first); // avoid compiler complaining when KOKKOS_EXPECTS is no-op @@ -202,6 +230,38 @@ void expect_valid_range(IteratorType first, IteratorType last) { (void)last; } +// +// Check if kokkos iterators are overlapping +// +template +KOKKOS_INLINE_FUNCTION void expect_no_overlap( + [[maybe_unused]] IteratorType1 first, [[maybe_unused]] IteratorType1 last, + [[maybe_unused]] IteratorType2 s_first) { + if constexpr (is_kokkos_iterator_v && + is_kokkos_iterator_v) { + auto const view1 = first.view(); + auto const view2 = s_first.view(); + + std::size_t stride1 = view1.stride(0); + std::size_t stride2 = view2.stride(0); + ptrdiff_t first_diff = view1.data() - view2.data(); + + // FIXME If strides are not identical, checks may not be made + // with the cost of O(1) + // Currently, checks are made only if strides are identical + // If first_diff == 0, there is already an overlap + if (stride1 == stride2 || first_diff == 0) { + [[maybe_unused]] bool is_no_overlap = (first_diff % stride1); + auto* first_pointer1 = view1.data(); + auto* first_pointer2 = view2.data(); + [[maybe_unused]] auto* last_pointer1 = first_pointer1 + (last - first); + [[maybe_unused]] auto* last_pointer2 = first_pointer2 + (last - first); + KOKKOS_EXPECTS(first_pointer1 >= last_pointer2 || + last_pointer1 <= first_pointer2 || is_no_overlap); + } + } +} + } // namespace Impl } // namespace Experimental } // namespace Kokkos diff --git a/algorithms/src/std_algorithms/impl/Kokkos_CopyBackward.hpp b/algorithms/src/std_algorithms/impl/Kokkos_CopyBackward.hpp index b3adbc5e2d..0f68c9e978 100644 --- a/algorithms/src/std_algorithms/impl/Kokkos_CopyBackward.hpp +++ b/algorithms/src/std_algorithms/impl/Kokkos_CopyBackward.hpp @@ -27,16 +27,18 @@ namespace Kokkos { namespace Experimental { namespace Impl { -template +template struct StdCopyBackwardFunctor { - static_assert(std::is_signed::value, - "Kokkos: StdCopyBackwardFunctor requires signed index type"); + // we can use difference type from IteratorType1 since + // the calling functions below already static assert that + // the iterators have matching difference type + using index_type = typename IteratorType1::difference_type; IteratorType1 m_last; IteratorType2 m_dest_last; KOKKOS_FUNCTION - void operator()(IndexType i) const { m_dest_last[-i - 1] = m_last[-i - 1]; } + void operator()(index_type i) const { m_dest_last[-i - 1] = m_last[-i - 1]; } KOKKOS_FUNCTION StdCopyBackwardFunctor(IteratorType1 _last, IteratorType2 _dest_last) @@ -44,30 +46,51 @@ struct StdCopyBackwardFunctor { }; template -IteratorType2 copy_backward_impl(const std::string& label, - const ExecutionSpace& ex, IteratorType1 first, - IteratorType1 last, IteratorType2 d_last) { +IteratorType2 copy_backward_exespace_impl(const std::string& label, + const ExecutionSpace& ex, + IteratorType1 first, + IteratorType1 last, + IteratorType2 d_last) { // checks Impl::static_assert_random_access_and_accessible(ex, first, d_last); Impl::static_assert_iterators_have_matching_difference_type(first, d_last); Impl::expect_valid_range(first, last); - // aliases - using index_type = typename IteratorType1::difference_type; - using func_t = - StdCopyBackwardFunctor; - // run const auto num_elements = Kokkos::Experimental::distance(first, last); ::Kokkos::parallel_for(label, RangePolicy(ex, 0, num_elements), - func_t(last, d_last)); + // use CTAD + StdCopyBackwardFunctor(last, d_last)); ex.fence("Kokkos::copy_backward: fence after operation"); // return return d_last - num_elements; } +// +// team-level impl +// +template +KOKKOS_FUNCTION IteratorType2 +copy_backward_team_impl(const TeamHandleType& teamHandle, IteratorType1 first, + IteratorType1 last, IteratorType2 d_last) { + // checks + Impl::static_assert_random_access_and_accessible(teamHandle, first, d_last); + Impl::static_assert_iterators_have_matching_difference_type(first, d_last); + Impl::expect_valid_range(first, last); + + // run + const auto num_elements = Kokkos::Experimental::distance(first, last); + ::Kokkos::parallel_for(TeamThreadRange(teamHandle, 0, num_elements), + // use CTAD + StdCopyBackwardFunctor(last, d_last)); + teamHandle.team_barrier(); + + // return + return d_last - num_elements; +} + } // namespace Impl } // namespace Experimental } // namespace Kokkos diff --git a/algorithms/src/std_algorithms/impl/Kokkos_CopyCopyN.hpp b/algorithms/src/std_algorithms/impl/Kokkos_CopyCopyN.hpp index 1b120c46d0..86e99ecbd0 100644 --- a/algorithms/src/std_algorithms/impl/Kokkos_CopyCopyN.hpp +++ b/algorithms/src/std_algorithms/impl/Kokkos_CopyCopyN.hpp @@ -27,13 +27,18 @@ namespace Kokkos { namespace Experimental { namespace Impl { -template +template struct StdCopyFunctor { + // we can use difference type from InputIterator since + // the calling functions below already static assert that + // the iterators have matching difference type + using index_type = typename InputIterator::difference_type; + InputIterator m_first; OutputIterator m_dest_first; KOKKOS_FUNCTION - void operator()(IndexType i) const { m_dest_first[i] = m_first[i]; } + void operator()(index_type i) const { m_dest_first[i] = m_first[i]; } KOKKOS_FUNCTION StdCopyFunctor(InputIterator _first, OutputIterator _dest_first) @@ -41,23 +46,20 @@ struct StdCopyFunctor { }; template -OutputIterator copy_impl(const std::string& label, const ExecutionSpace& ex, - InputIterator first, InputIterator last, - OutputIterator d_first) { +OutputIterator copy_exespace_impl(const std::string& label, + const ExecutionSpace& ex, InputIterator first, + InputIterator last, OutputIterator d_first) { // checks Impl::static_assert_random_access_and_accessible(ex, first, d_first); Impl::static_assert_iterators_have_matching_difference_type(first, d_first); Impl::expect_valid_range(first, last); - // aliases - using index_type = typename InputIterator::difference_type; - using func_t = StdCopyFunctor; - // run const auto num_elements = Kokkos::Experimental::distance(first, last); ::Kokkos::parallel_for(label, RangePolicy(ex, 0, num_elements), - func_t(first, d_first)); + // use CTAD + StdCopyFunctor(first, d_first)); ex.fence("Kokkos::copy: fence after operation"); // return @@ -66,16 +68,61 @@ OutputIterator copy_impl(const std::string& label, const ExecutionSpace& ex, template -OutputIterator copy_n_impl(const std::string& label, const ExecutionSpace& ex, - InputIterator first_from, Size count, - OutputIterator first_dest) { +OutputIterator copy_n_exespace_impl(const std::string& label, + const ExecutionSpace& ex, + InputIterator first_from, Size count, + OutputIterator first_dest) { // checks Impl::static_assert_random_access_and_accessible(ex, first_from, first_dest); Impl::static_assert_iterators_have_matching_difference_type(first_from, first_dest); if (count > 0) { - return copy_impl(label, ex, first_from, first_from + count, first_dest); + return copy_exespace_impl(label, ex, first_from, first_from + count, + first_dest); + } else { + return first_dest; + } +} + +// +// team-level impl +// +template +KOKKOS_FUNCTION OutputIterator copy_team_impl(const TeamHandleType& teamHandle, + InputIterator first, + InputIterator last, + OutputIterator d_first) { + // checks + Impl::static_assert_random_access_and_accessible(teamHandle, first, d_first); + Impl::static_assert_iterators_have_matching_difference_type(first, d_first); + Impl::expect_valid_range(first, last); + + // run + const auto num_elements = Kokkos::Experimental::distance(first, last); + ::Kokkos::parallel_for(TeamThreadRange(teamHandle, 0, num_elements), + // use CTAD + StdCopyFunctor(first, d_first)); + teamHandle.team_barrier(); + + // return + return d_first + num_elements; +} + +template +KOKKOS_FUNCTION OutputIterator +copy_n_team_impl(const TeamHandleType& teamHandle, InputIterator first_from, + Size count, OutputIterator first_dest) { + // checks + Impl::static_assert_random_access_and_accessible(teamHandle, first_from, + first_dest); + Impl::static_assert_iterators_have_matching_difference_type(first_from, + first_dest); + + if (count > 0) { + return copy_team_impl(teamHandle, first_from, first_from + count, + first_dest); } else { return first_dest; } diff --git a/algorithms/src/std_algorithms/impl/Kokkos_CopyIf.hpp b/algorithms/src/std_algorithms/impl/Kokkos_CopyIf.hpp index 3c0c4f7e9b..ad7b8bb8ca 100644 --- a/algorithms/src/std_algorithms/impl/Kokkos_CopyIf.hpp +++ b/algorithms/src/std_algorithms/impl/Kokkos_CopyIf.hpp @@ -20,6 +20,7 @@ #include #include "Kokkos_Constraints.hpp" #include "Kokkos_HelperPredicates.hpp" +#include "Kokkos_MustUseKokkosSingleInTeam.hpp" #include #include @@ -27,8 +28,10 @@ namespace Kokkos { namespace Experimental { namespace Impl { -template +template struct StdCopyIfFunctor { + using index_type = typename FirstFrom::difference_type; + FirstFrom m_first_from; FirstDest m_first_dest; PredType m_pred; @@ -40,7 +43,7 @@ struct StdCopyIfFunctor { m_pred(std::move(pred)) {} KOKKOS_FUNCTION - void operator()(const IndexType i, IndexType& update, + void operator()(const index_type i, index_type& update, const bool final_pass) const { const auto& myval = m_first_from[i]; if (final_pass) { @@ -57,9 +60,11 @@ struct StdCopyIfFunctor { template -OutputIterator copy_if_impl(const std::string& label, const ExecutionSpace& ex, - InputIterator first, InputIterator last, - OutputIterator d_first, PredicateType pred) { +OutputIterator copy_if_exespace_impl(const std::string& label, + const ExecutionSpace& ex, + InputIterator first, InputIterator last, + OutputIterator d_first, + PredicateType pred) { /* To explain the impl, suppose that our data is: @@ -90,23 +95,68 @@ OutputIterator copy_if_impl(const std::string& label, const ExecutionSpace& ex, if (first == last) { return d_first; } else { - // aliases - using index_type = typename InputIterator::difference_type; - using func_type = StdCopyIfFunctor; - // run const auto num_elements = Kokkos::Experimental::distance(first, last); - index_type count = 0; + + typename InputIterator::difference_type count = 0; ::Kokkos::parallel_scan(label, RangePolicy(ex, 0, num_elements), - func_type(first, d_first, pred), count); + // use CTAD + StdCopyIfFunctor(first, d_first, pred), count); // fence not needed because of the scan accumulating into count return d_first + count; } } +template +KOKKOS_FUNCTION OutputIterator copy_if_team_impl( + const TeamHandleType& teamHandle, InputIterator first, InputIterator last, + OutputIterator d_first, PredicateType pred) { + // checks + Impl::static_assert_random_access_and_accessible(teamHandle, first, d_first); + Impl::static_assert_iterators_have_matching_difference_type(first, d_first); + Impl::expect_valid_range(first, last); + + if (first == last) { + return d_first; + } + + const std::size_t num_elements = Kokkos::Experimental::distance(first, last); + if constexpr (stdalgo_must_use_kokkos_single_for_team_scan_v< + typename TeamHandleType::execution_space>) { + std::size_t count = 0; + Kokkos::single( + Kokkos::PerTeam(teamHandle), + [=](std::size_t& lcount) { + lcount = 0; + for (std::size_t i = 0; i < num_elements; ++i) { + const auto& myval = first[i]; + if (pred(myval)) { + d_first[lcount++] = myval; + } + } + }, + count); + // no barrier needed since single above broadcasts to all members + return d_first + count; + + } else { + typename InputIterator::difference_type count = 0; + ::Kokkos::parallel_scan(TeamThreadRange(teamHandle, 0, num_elements), + StdCopyIfFunctor(first, d_first, pred), count); + // no barrier needed because of the scan accumulating into count + return d_first + count; + } + +#if defined KOKKOS_COMPILER_INTEL || \ + (defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \ + !defined(KOKKOS_COMPILER_MSVC)) + __builtin_unreachable(); +#endif +} + } // namespace Impl } // namespace Experimental } // namespace Kokkos diff --git a/algorithms/src/std_algorithms/impl/Kokkos_CountCountIf.hpp b/algorithms/src/std_algorithms/impl/Kokkos_CountCountIf.hpp index 18b8c46359..9b6b403aa4 100644 --- a/algorithms/src/std_algorithms/impl/Kokkos_CountCountIf.hpp +++ b/algorithms/src/std_algorithms/impl/Kokkos_CountCountIf.hpp @@ -46,37 +46,65 @@ struct StdCountIfFunctor { }; template -typename IteratorType::difference_type count_if_impl(const std::string& label, - const ExecutionSpace& ex, - IteratorType first, - IteratorType last, - Predicate predicate) { +typename IteratorType::difference_type count_if_exespace_impl( + const std::string& label, const ExecutionSpace& ex, IteratorType first, + IteratorType last, Predicate predicate) { // checks Impl::static_assert_random_access_and_accessible(ex, first); Impl::expect_valid_range(first, last); - // aliases - using func_t = StdCountIfFunctor; - // run const auto num_elements = Kokkos::Experimental::distance(first, last); typename IteratorType::difference_type count = 0; ::Kokkos::parallel_reduce(label, RangePolicy(ex, 0, num_elements), - func_t(first, predicate), count); + // use CTAD + StdCountIfFunctor(first, predicate), count); ex.fence("Kokkos::count_if: fence after operation"); return count; } template -auto count_impl(const std::string& label, const ExecutionSpace& ex, - IteratorType first, IteratorType last, const T& value) { - return count_if_impl( +auto count_exespace_impl(const std::string& label, const ExecutionSpace& ex, + IteratorType first, IteratorType last, + const T& value) { + return count_if_exespace_impl( label, ex, first, last, ::Kokkos::Experimental::Impl::StdAlgoEqualsValUnaryPredicate(value)); } +// +// team-level impl +// +template +KOKKOS_FUNCTION typename IteratorType::difference_type count_if_team_impl( + const TeamHandleType& teamHandle, IteratorType first, IteratorType last, + Predicate predicate) { + // checks + Impl::static_assert_random_access_and_accessible(teamHandle, first); + Impl::expect_valid_range(first, last); + + // run + const auto num_elements = Kokkos::Experimental::distance(first, last); + typename IteratorType::difference_type count = 0; + ::Kokkos::parallel_reduce(TeamThreadRange(teamHandle, 0, num_elements), + // use CTAD + StdCountIfFunctor(first, predicate), count); + teamHandle.team_barrier(); + + return count; +} + +template +KOKKOS_FUNCTION auto count_team_impl(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last, + const T& value) { + return count_if_team_impl( + teamHandle, first, last, + ::Kokkos::Experimental::Impl::StdAlgoEqualsValUnaryPredicate(value)); +} + } // namespace Impl } // namespace Experimental } // namespace Kokkos diff --git a/algorithms/src/std_algorithms/impl/Kokkos_Equal.hpp b/algorithms/src/std_algorithms/impl/Kokkos_Equal.hpp index e045080d4a..62b7d226f6 100644 --- a/algorithms/src/std_algorithms/impl/Kokkos_Equal.hpp +++ b/algorithms/src/std_algorithms/impl/Kokkos_Equal.hpp @@ -27,15 +27,16 @@ namespace Kokkos { namespace Experimental { namespace Impl { -template +template struct StdEqualFunctor { + using index_type = typename IteratorType1::difference_type; + IteratorType1 m_first1; IteratorType2 m_first2; BinaryPredicateType m_predicate; KOKKOS_FUNCTION - void operator()(IndexType i, std::size_t& lsum) const { + void operator()(index_type i, std::size_t& lsum) const { if (!m_predicate(m_first1[i], m_first2[i])) { lsum = 1; } @@ -49,67 +50,130 @@ struct StdEqualFunctor { m_predicate(std::move(_predicate)) {} }; +// +// exespace impl +// template -bool equal_impl(const std::string& label, const ExecutionSpace& ex, - IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, - BinaryPredicateType predicate) { +bool equal_exespace_impl(const std::string& label, const ExecutionSpace& ex, + IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2, BinaryPredicateType predicate) { // checks Impl::static_assert_random_access_and_accessible(ex, first1, first2); Impl::static_assert_iterators_have_matching_difference_type(first1, first2); Impl::expect_valid_range(first1, last1); - // aliases - using index_type = typename IteratorType1::difference_type; - using func_t = StdEqualFunctor; - // run const auto num_elements = Kokkos::Experimental::distance(first1, last1); std::size_t different = 0; - ::Kokkos::parallel_reduce(label, - RangePolicy(ex, 0, num_elements), - func_t(first1, first2, predicate), different); + ::Kokkos::parallel_reduce( + label, RangePolicy(ex, 0, num_elements), + StdEqualFunctor(first1, first2, predicate), different); ex.fence("Kokkos::equal: fence after operation"); return !different; } template -bool equal_impl(const std::string& label, const ExecutionSpace& ex, - IteratorType1 first1, IteratorType1 last1, - IteratorType2 first2) { +bool equal_exespace_impl(const std::string& label, const ExecutionSpace& ex, + IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2) { using value_type1 = typename IteratorType1::value_type; using value_type2 = typename IteratorType2::value_type; using pred_t = StdAlgoEqualBinaryPredicate; - return equal_impl(label, ex, first1, last1, first2, pred_t()); + return equal_exespace_impl(label, ex, first1, last1, first2, pred_t()); } template -bool equal_impl(const std::string& label, const ExecutionSpace& ex, - IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, - IteratorType2 last2, BinaryPredicateType predicate) { +bool equal_exespace_impl(const std::string& label, const ExecutionSpace& ex, + IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2, IteratorType2 last2, + BinaryPredicateType predicate) { const auto d1 = ::Kokkos::Experimental::distance(first1, last1); const auto d2 = ::Kokkos::Experimental::distance(first2, last2); if (d1 != d2) { return false; } - return equal_impl(label, ex, first1, last1, first2, predicate); + return equal_exespace_impl(label, ex, first1, last1, first2, predicate); } template -bool equal_impl(const std::string& label, const ExecutionSpace& ex, - IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, - IteratorType2 last2) { +bool equal_exespace_impl(const std::string& label, const ExecutionSpace& ex, + IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2, IteratorType2 last2) { + Impl::expect_valid_range(first1, last1); + Impl::expect_valid_range(first2, last2); + + using value_type1 = typename IteratorType1::value_type; + using value_type2 = typename IteratorType2::value_type; + using pred_t = StdAlgoEqualBinaryPredicate; + return equal_exespace_impl(label, ex, first1, last1, first2, last2, pred_t()); +} + +// +// team impl +// +template +KOKKOS_FUNCTION bool equal_team_impl(const TeamHandleType& teamHandle, + IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2, + BinaryPredicateType predicate) { + // checks + Impl::static_assert_random_access_and_accessible(teamHandle, first1, first2); + Impl::static_assert_iterators_have_matching_difference_type(first1, first2); + Impl::expect_valid_range(first1, last1); + + // run + const auto num_elements = Kokkos::Experimental::distance(first1, last1); + std::size_t different = 0; + ::Kokkos::parallel_reduce(TeamThreadRange(teamHandle, 0, num_elements), + StdEqualFunctor(first1, first2, predicate), + different); + teamHandle.team_barrier(); + + return !different; +} + +template +KOKKOS_FUNCTION bool equal_team_impl(const TeamHandleType& teamHandle, + IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2) { + using value_type1 = typename IteratorType1::value_type; + using value_type2 = typename IteratorType2::value_type; + using pred_t = StdAlgoEqualBinaryPredicate; + return equal_team_impl(teamHandle, first1, last1, first2, pred_t()); +} + +template +KOKKOS_FUNCTION bool equal_team_impl(const TeamHandleType& teamHandle, + IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2, IteratorType2 last2, + BinaryPredicateType predicate) { + const auto d1 = ::Kokkos::Experimental::distance(first1, last1); + const auto d2 = ::Kokkos::Experimental::distance(first2, last2); + if (d1 != d2) { + return false; + } + + return equal_team_impl(teamHandle, first1, last1, first2, predicate); +} + +template +KOKKOS_FUNCTION bool equal_team_impl(const TeamHandleType& teamHandle, + IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2, + IteratorType2 last2) { Impl::expect_valid_range(first1, last1); Impl::expect_valid_range(first2, last2); using value_type1 = typename IteratorType1::value_type; using value_type2 = typename IteratorType2::value_type; using pred_t = StdAlgoEqualBinaryPredicate; - return equal_impl(label, ex, first1, last1, first2, last2, pred_t()); + return equal_team_impl(teamHandle, first1, last1, first2, last2, pred_t()); } } // namespace Impl diff --git a/algorithms/src/std_algorithms/impl/Kokkos_ExclusiveScan.hpp b/algorithms/src/std_algorithms/impl/Kokkos_ExclusiveScan.hpp index 71f13e490a..6da992b4bb 100644 --- a/algorithms/src/std_algorithms/impl/Kokkos_ExclusiveScan.hpp +++ b/algorithms/src/std_algorithms/impl/Kokkos_ExclusiveScan.hpp @@ -22,6 +22,7 @@ #include "Kokkos_HelperPredicates.hpp" #include "Kokkos_ValueWrapperForNoNeutralElement.hpp" #include "Kokkos_IdentityReferenceUnaryFunctor.hpp" +#include "Kokkos_FunctorsForExclusiveScan.hpp" #include #include #include @@ -30,127 +31,15 @@ namespace Kokkos { namespace Experimental { namespace Impl { -template -struct ExclusiveScanDefaultFunctorForKnownNeutralElement { - using execution_space = ExeSpace; - - ValueType m_init_value; - FirstFrom m_first_from; - FirstDest m_first_dest; - - KOKKOS_FUNCTION - ExclusiveScanDefaultFunctorForKnownNeutralElement(ValueType init, - FirstFrom first_from, - FirstDest first_dest) - : m_init_value(std::move(init)), - m_first_from(std::move(first_from)), - m_first_dest(std::move(first_dest)) {} - - KOKKOS_FUNCTION - void operator()(const IndexType i, ValueType& update, - const bool final_pass) const { - if (final_pass) m_first_dest[i] = update + m_init_value; - update += m_first_from[i]; - } -}; - -template -struct ExclusiveScanDefaultFunctor { - using execution_space = ExeSpace; - using value_type = - ::Kokkos::Experimental::Impl::ValueWrapperForNoNeutralElement; - - ValueType m_init_value; - FirstFrom m_first_from; - FirstDest m_first_dest; - - KOKKOS_FUNCTION - ExclusiveScanDefaultFunctor(ValueType init, FirstFrom first_from, - FirstDest first_dest) - : m_init_value(std::move(init)), - m_first_from(std::move(first_from)), - m_first_dest(std::move(first_dest)) {} - - KOKKOS_FUNCTION - void operator()(const IndexType i, value_type& update, - const bool final_pass) const { - if (final_pass) { - if (i == 0) { - m_first_dest[i] = m_init_value; - } else { - m_first_dest[i] = update.val + m_init_value; - } - } - - const auto tmp = value_type{m_first_from[i], false}; - this->join(update, tmp); - } - - KOKKOS_FUNCTION - void init(value_type& update) const { - update.val = {}; - update.is_initial = true; - } - - KOKKOS_FUNCTION - void join(value_type& update, const value_type& input) const { - if (input.is_initial) return; - - if (update.is_initial) { - update.val = input.val; - update.is_initial = false; - } else { - update.val = update.val + input.val; - } - } -}; - +// +// exespace impl +// template -OutputIteratorType exclusive_scan_custom_op_impl( + class OutputIteratorType, class ValueType> +OutputIteratorType exclusive_scan_default_op_exespace_impl( const std::string& label, const ExecutionSpace& ex, InputIteratorType first_from, InputIteratorType last_from, - OutputIteratorType first_dest, ValueType init_value, BinaryOpType bop) { - // checks - Impl::static_assert_random_access_and_accessible(ex, first_from, first_dest); - Impl::static_assert_iterators_have_matching_difference_type(first_from, - first_dest); - Impl::expect_valid_range(first_from, last_from); - - // aliases - using index_type = typename InputIteratorType::difference_type; - using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor; - using func_type = - TransformExclusiveScanFunctor; - - // run - const auto num_elements = - Kokkos::Experimental::distance(first_from, last_from); - ::Kokkos::parallel_scan( - label, RangePolicy(ex, 0, num_elements), - func_type(init_value, first_from, first_dest, bop, unary_op_type())); - ex.fence("Kokkos::exclusive_scan_custom_op: fence after operation"); - - // return - return first_dest + num_elements; -} - -template -using ex_scan_has_reduction_identity_sum_t = - decltype(Kokkos::reduction_identity::sum()); - -template -OutputIteratorType exclusive_scan_default_op_impl(const std::string& label, - const ExecutionSpace& ex, - InputIteratorType first_from, - InputIteratorType last_from, - OutputIteratorType first_dest, - ValueType init_value) { + OutputIteratorType first_dest, ValueType init_value) { // checks Impl::static_assert_random_access_and_accessible(ex, first_from, first_dest); Impl::static_assert_iterators_have_matching_difference_type(first_from, @@ -184,17 +73,122 @@ OutputIteratorType exclusive_scan_default_op_impl(const std::string& label, ExclusiveScanDefaultFunctorForKnownNeutralElement< ExecutionSpace, index_type, ValueType, InputIteratorType, OutputIteratorType>, - ExclusiveScanDefaultFunctor>; + ExclusiveScanDefaultFunctorWithValueWrapper>; + + // run + const auto num_elements = + Kokkos::Experimental::distance(first_from, last_from); + ::Kokkos::parallel_scan( + label, RangePolicy(ex, 0, num_elements), + func_type(std::move(init_value), first_from, first_dest)); + + ex.fence("Kokkos::exclusive_scan_default_op: fence after operation"); + + return first_dest + num_elements; +} + +template +OutputIteratorType exclusive_scan_custom_op_exespace_impl( + const std::string& label, const ExecutionSpace& ex, + InputIteratorType first_from, InputIteratorType last_from, + OutputIteratorType first_dest, ValueType init_value, BinaryOpType bop) { + // checks + Impl::static_assert_random_access_and_accessible(ex, first_from, first_dest); + Impl::static_assert_iterators_have_matching_difference_type(first_from, + first_dest); + Impl::expect_valid_range(first_from, last_from); + + // aliases + using index_type = typename InputIteratorType::difference_type; + using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor; + using func_type = TransformExclusiveScanFunctorWithValueWrapper< + ExecutionSpace, index_type, ValueType, InputIteratorType, + OutputIteratorType, BinaryOpType, unary_op_type>; // run const auto num_elements = Kokkos::Experimental::distance(first_from, last_from); ::Kokkos::parallel_scan(label, RangePolicy(ex, 0, num_elements), - func_type(init_value, first_from, first_dest)); + func_type(std::move(init_value), first_from, + first_dest, bop, unary_op_type())); + ex.fence("Kokkos::exclusive_scan_custom_op: fence after operation"); - ex.fence("Kokkos::exclusive_scan_default_op: fence after operation"); + // return + return first_dest + num_elements; +} + +// +// team impl +// +template +KOKKOS_FUNCTION OutputIteratorType exclusive_scan_default_op_team_impl( + const TeamHandleType& teamHandle, InputIteratorType first_from, + InputIteratorType last_from, OutputIteratorType first_dest, + ValueType init_value) { + // checks + Impl::static_assert_random_access_and_accessible(teamHandle, first_from, + first_dest); + Impl::static_assert_iterators_have_matching_difference_type(first_from, + first_dest); + Impl::expect_valid_range(first_from, last_from); + + static_assert( + ::Kokkos::is_detected_v, + "The team-level impl of Kokkos::Experimental::exclusive_scan currently " + "does not support types without reduction identity"); + + // aliases + using exe_space = typename TeamHandleType::execution_space; + using index_type = typename InputIteratorType::difference_type; + using func_type = ExclusiveScanDefaultFunctorForKnownNeutralElement< + exe_space, index_type, ValueType, InputIteratorType, OutputIteratorType>; + + const auto num_elements = + Kokkos::Experimental::distance(first_from, last_from); + ::Kokkos::parallel_scan( + TeamThreadRange(teamHandle, 0, num_elements), + func_type(std::move(init_value), first_from, first_dest)); + teamHandle.team_barrier(); + return first_dest + num_elements; +} + +template +KOKKOS_FUNCTION OutputIteratorType exclusive_scan_custom_op_team_impl( + const TeamHandleType& teamHandle, InputIteratorType first_from, + InputIteratorType last_from, OutputIteratorType first_dest, + ValueType init_value, BinaryOpType bop) { + // checks + Impl::static_assert_random_access_and_accessible(teamHandle, first_from, + first_dest); + Impl::static_assert_iterators_have_matching_difference_type(first_from, + first_dest); + Impl::expect_valid_range(first_from, last_from); + + static_assert( + ::Kokkos::is_detected_v, + "The team-level impl of Kokkos::Experimental::exclusive_scan currently " + "does not support types without reduction identity"); + + // aliases + using exe_space = typename TeamHandleType::execution_space; + using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor; + using index_type = typename InputIteratorType::difference_type; + using func_type = TransformExclusiveScanFunctorWithoutValueWrapper< + exe_space, index_type, ValueType, InputIteratorType, OutputIteratorType, + BinaryOpType, unary_op_type>; + + const auto num_elements = + Kokkos::Experimental::distance(first_from, last_from); + ::Kokkos::parallel_scan(TeamThreadRange(teamHandle, 0, num_elements), + func_type(std::move(init_value), first_from, + first_dest, bop, unary_op_type())); + teamHandle.team_barrier(); return first_dest + num_elements; } diff --git a/algorithms/src/std_algorithms/impl/Kokkos_FillFillN.hpp b/algorithms/src/std_algorithms/impl/Kokkos_FillFillN.hpp index 316d865f31..972e57f2cc 100644 --- a/algorithms/src/std_algorithms/impl/Kokkos_FillFillN.hpp +++ b/algorithms/src/std_algorithms/impl/Kokkos_FillFillN.hpp @@ -41,9 +41,12 @@ struct StdFillFunctor { : m_first(std::move(_first)), m_value(std::move(_value)) {} }; +// +// exespace impl +// template -void fill_impl(const std::string& label, const ExecutionSpace& ex, - IteratorType first, IteratorType last, const T& value) { +void fill_exespace_impl(const std::string& label, const ExecutionSpace& ex, + IteratorType first, IteratorType last, const T& value) { // checks Impl::static_assert_random_access_and_accessible(ex, first); Impl::expect_valid_range(first, last); @@ -52,13 +55,14 @@ void fill_impl(const std::string& label, const ExecutionSpace& ex, const auto num_elements = Kokkos::Experimental::distance(first, last); ::Kokkos::parallel_for(label, RangePolicy(ex, 0, num_elements), - StdFillFunctor(first, value)); + StdFillFunctor(first, value)); ex.fence("Kokkos::fill: fence after operation"); } template -IteratorType fill_n_impl(const std::string& label, const ExecutionSpace& ex, - IteratorType first, SizeType n, const T& value) { +IteratorType fill_n_exespace_impl(const std::string& label, + const ExecutionSpace& ex, IteratorType first, + SizeType n, const T& value) { auto last = first + n; Impl::static_assert_random_access_and_accessible(ex, first); Impl::expect_valid_range(first, last); @@ -67,7 +71,40 @@ IteratorType fill_n_impl(const std::string& label, const ExecutionSpace& ex, return first; } - fill_impl(label, ex, first, last, value); + fill_exespace_impl(label, ex, first, last, value); + return last; +} + +// +// team-level impl +// +template +KOKKOS_FUNCTION void fill_team_impl(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last, + const T& value) { + Impl::static_assert_random_access_and_accessible(teamHandle, first); + Impl::expect_valid_range(first, last); + + const auto num_elements = Kokkos::Experimental::distance(first, last); + ::Kokkos::parallel_for(TeamThreadRange(teamHandle, 0, num_elements), + StdFillFunctor(first, value)); + + teamHandle.team_barrier(); +} + +template +KOKKOS_FUNCTION IteratorType fill_n_team_impl(const TeamHandleType& teamHandle, + IteratorType first, SizeType n, + const T& value) { + auto last = first + n; + Impl::static_assert_random_access_and_accessible(teamHandle, first); + Impl::expect_valid_range(first, last); + + if (n <= 0) { + return first; + } + + fill_team_impl(teamHandle, first, last, value); return last; } diff --git a/algorithms/src/std_algorithms/impl/Kokkos_FindEnd.hpp b/algorithms/src/std_algorithms/impl/Kokkos_FindEnd.hpp index 3ec64fa43d..1f1ec5e54f 100644 --- a/algorithms/src/std_algorithms/impl/Kokkos_FindEnd.hpp +++ b/algorithms/src/std_algorithms/impl/Kokkos_FindEnd.hpp @@ -80,12 +80,17 @@ struct StdFindEndFunctor { m_p(std::move(p)) {} }; +// +// exespace impl +// template -IteratorType1 find_end_impl(const std::string& label, const ExecutionSpace& ex, - IteratorType1 first, IteratorType1 last, - IteratorType2 s_first, IteratorType2 s_last, - const BinaryPredicateType& pred) { +IteratorType1 find_end_exespace_impl(const std::string& label, + const ExecutionSpace& ex, + IteratorType1 first, IteratorType1 last, + IteratorType2 s_first, + IteratorType2 s_last, + const BinaryPredicateType& pred) { // checks Impl::static_assert_random_access_and_accessible(ex, first, s_first); Impl::static_assert_iterators_have_matching_difference_type(first, s_first); @@ -97,7 +102,6 @@ IteratorType1 find_end_impl(const std::string& label, const ExecutionSpace& ex, const auto num_elements = KE::distance(first, last); const auto s_count = KE::distance(s_first, s_last); KOKKOS_EXPECTS(num_elements >= s_count); - (void)s_count; // needed when macro above is a no-op if (s_first == s_last) { return last; @@ -109,7 +113,8 @@ IteratorType1 find_end_impl(const std::string& label, const ExecutionSpace& ex, // special case where the two ranges have equal size if (num_elements == s_count) { - const auto equal_result = equal_impl(label, ex, first, last, s_first, pred); + const auto equal_result = + equal_exespace_impl(label, ex, first, last, s_first, pred); return (equal_result) ? first : last; } else { using index_type = typename IteratorType1::difference_type; @@ -148,14 +153,97 @@ IteratorType1 find_end_impl(const std::string& label, const ExecutionSpace& ex, } template -IteratorType1 find_end_impl(const std::string& label, const ExecutionSpace& ex, - IteratorType1 first, IteratorType1 last, - IteratorType2 s_first, IteratorType2 s_last) { +IteratorType1 find_end_exespace_impl(const std::string& label, + const ExecutionSpace& ex, + IteratorType1 first, IteratorType1 last, + IteratorType2 s_first, + IteratorType2 s_last) { + using value_type1 = typename IteratorType1::value_type; + using value_type2 = typename IteratorType2::value_type; + using predicate_type = StdAlgoEqualBinaryPredicate; + return find_end_exespace_impl(label, ex, first, last, s_first, s_last, + predicate_type()); +} + +// +// team impl +// +template +KOKKOS_FUNCTION IteratorType1 +find_end_team_impl(const TeamHandleType& teamHandle, IteratorType1 first, + IteratorType1 last, IteratorType2 s_first, + IteratorType2 s_last, const BinaryPredicateType& pred) { + // checks + Impl::static_assert_random_access_and_accessible(teamHandle, first, s_first); + Impl::static_assert_iterators_have_matching_difference_type(first, s_first); + Impl::expect_valid_range(first, last); + Impl::expect_valid_range(s_first, s_last); + + // the target sequence should not be larger than the range [first, last) + namespace KE = ::Kokkos::Experimental; + const auto num_elements = KE::distance(first, last); + const auto s_count = KE::distance(s_first, s_last); + KOKKOS_EXPECTS(num_elements >= s_count); + + if (s_first == s_last) { + return last; + } + + if (first == last) { + return last; + } + + // special case where the two ranges have equal size + if (num_elements == s_count) { + const auto equal_result = + equal_team_impl(teamHandle, first, last, s_first, pred); + return (equal_result) ? first : last; + } else { + using index_type = typename IteratorType1::difference_type; + using reducer_type = LastLoc; + using reduction_value_type = typename reducer_type::value_type; + using func_t = StdFindEndFunctor; + + // run + reduction_value_type red_result; + reducer_type reducer(red_result); + + // decide the size of the range policy of the par_red: + // note that the last feasible index to start looking is the index + // whose distance from the "last" is equal to the sequence count. + // the +1 is because we need to include that location too. + const auto range_size = num_elements - s_count + 1; + + // run par reduce + ::Kokkos::parallel_reduce( + TeamThreadRange(teamHandle, 0, range_size), + func_t(first, last, s_first, s_last, reducer, pred), reducer); + + teamHandle.team_barrier(); + + // decide and return + if (red_result.max_loc_true == + ::Kokkos::reduction_identity::max()) { + // if here, a subrange has not been found + return last; + } else { + // a location has been found + return first + red_result.max_loc_true; + } + } +} + +template +KOKKOS_FUNCTION IteratorType1 find_end_team_impl( + const TeamHandleType& teamHandle, IteratorType1 first, IteratorType1 last, + IteratorType2 s_first, IteratorType2 s_last) { using value_type1 = typename IteratorType1::value_type; using value_type2 = typename IteratorType2::value_type; using predicate_type = StdAlgoEqualBinaryPredicate; - return find_end_impl(label, ex, first, last, s_first, s_last, - predicate_type()); + return find_end_team_impl(teamHandle, first, last, s_first, s_last, + predicate_type()); } } // namespace Impl diff --git a/algorithms/src/std_algorithms/impl/Kokkos_FindFirstOf.hpp b/algorithms/src/std_algorithms/impl/Kokkos_FindFirstOf.hpp index 5f22d2ad13..145e235b9d 100644 --- a/algorithms/src/std_algorithms/impl/Kokkos_FindFirstOf.hpp +++ b/algorithms/src/std_algorithms/impl/Kokkos_FindFirstOf.hpp @@ -71,13 +71,15 @@ struct StdFindFirstOfFunctor { m_p(std::move(p)) {} }; +// +// exespace impl +// template -IteratorType1 find_first_of_impl(const std::string& label, - const ExecutionSpace& ex, IteratorType1 first, - IteratorType1 last, IteratorType2 s_first, - IteratorType2 s_last, - const BinaryPredicateType& pred) { +IteratorType1 find_first_of_exespace_impl( + const std::string& label, const ExecutionSpace& ex, IteratorType1 first, + IteratorType1 last, IteratorType2 s_first, IteratorType2 s_last, + const BinaryPredicateType& pred) { // checks Impl::static_assert_random_access_and_accessible(ex, first, s_first); Impl::static_assert_iterators_have_matching_difference_type(first, s_first); @@ -116,15 +118,71 @@ IteratorType1 find_first_of_impl(const std::string& label, } template -IteratorType1 find_first_of_impl(const std::string& label, - const ExecutionSpace& ex, IteratorType1 first, - IteratorType1 last, IteratorType2 s_first, - IteratorType2 s_last) { +IteratorType1 find_first_of_exespace_impl( + const std::string& label, const ExecutionSpace& ex, IteratorType1 first, + IteratorType1 last, IteratorType2 s_first, IteratorType2 s_last) { + using value_type1 = typename IteratorType1::value_type; + using value_type2 = typename IteratorType2::value_type; + using predicate_type = StdAlgoEqualBinaryPredicate; + return find_first_of_exespace_impl(label, ex, first, last, s_first, s_last, + predicate_type()); +} + +// +// team impl +// +template +KOKKOS_FUNCTION IteratorType1 +find_first_of_team_impl(const TeamHandleType& teamHandle, IteratorType1 first, + IteratorType1 last, IteratorType2 s_first, + IteratorType2 s_last, const BinaryPredicateType& pred) { + // checks + Impl::static_assert_random_access_and_accessible(teamHandle, first, s_first); + Impl::static_assert_iterators_have_matching_difference_type(first, s_first); + Impl::expect_valid_range(first, last); + Impl::expect_valid_range(s_first, s_last); + + if ((s_first == s_last) || (first == last)) { + return last; + } + + using index_type = typename IteratorType1::difference_type; + using reducer_type = FirstLoc; + using reduction_value_type = typename reducer_type::value_type; + using func_t = StdFindFirstOfFunctor; + + // run + reduction_value_type red_result; + reducer_type reducer(red_result); + const auto num_elements = Kokkos::Experimental::distance(first, last); + ::Kokkos::parallel_reduce(TeamThreadRange(teamHandle, 0, num_elements), + func_t(first, s_first, s_last, reducer, pred), + reducer); + + teamHandle.team_barrier(); + + // decide and return + if (red_result.min_loc_true == + ::Kokkos::reduction_identity::min()) { + // if here, nothing found + return last; + } else { + // a location has been found + return first + red_result.min_loc_true; + } +} + +template +KOKKOS_FUNCTION IteratorType1 find_first_of_team_impl( + const TeamHandleType& teamHandle, IteratorType1 first, IteratorType1 last, + IteratorType2 s_first, IteratorType2 s_last) { using value_type1 = typename IteratorType1::value_type; using value_type2 = typename IteratorType2::value_type; using predicate_type = StdAlgoEqualBinaryPredicate; - return find_first_of_impl(label, ex, first, last, s_first, s_last, - predicate_type()); + return find_first_of_team_impl(teamHandle, first, last, s_first, s_last, + predicate_type()); } } // namespace Impl diff --git a/algorithms/src/std_algorithms/impl/Kokkos_FindIfOrNot.hpp b/algorithms/src/std_algorithms/impl/Kokkos_FindIfOrNot.hpp index 9c0b0c0ccd..8fffb59094 100644 --- a/algorithms/src/std_algorithms/impl/Kokkos_FindIfOrNot.hpp +++ b/algorithms/src/std_algorithms/impl/Kokkos_FindIfOrNot.hpp @@ -61,11 +61,15 @@ struct StdFindIfOrNotFunctor { m_p(std::move(p)) {} }; +// +// exespace impl +// template -IteratorType find_if_or_not_impl(const std::string& label, - const ExecutionSpace& ex, IteratorType first, - IteratorType last, PredicateType pred) { +IteratorType find_if_or_not_exespace_impl(const std::string& label, + const ExecutionSpace& ex, + IteratorType first, IteratorType last, + PredicateType pred) { // checks Impl::static_assert_random_access_and_accessible( ex, first); // only need one It per type @@ -104,14 +108,68 @@ IteratorType find_if_or_not_impl(const std::string& label, } template -InputIterator find_impl(const std::string& label, ExecutionSpace ex, - InputIterator first, InputIterator last, - const T& value) { - return find_if_or_not_impl( +InputIterator find_exespace_impl(const std::string& label, ExecutionSpace ex, + InputIterator first, InputIterator last, + const T& value) { + return find_if_or_not_exespace_impl( label, ex, first, last, ::Kokkos::Experimental::Impl::StdAlgoEqualsValUnaryPredicate(value)); } +// +// team impl +// +template +KOKKOS_FUNCTION IteratorType +find_if_or_not_team_impl(const TeamHandleType& teamHandle, IteratorType first, + IteratorType last, PredicateType pred) { + // checks + Impl::static_assert_random_access_and_accessible( + teamHandle, first); // only need one It per type + Impl::expect_valid_range(first, last); + + if (first == last) { + return last; + } + + // aliases + using index_type = typename IteratorType::difference_type; + using reducer_type = FirstLoc; + using reduction_value_type = typename reducer_type::value_type; + using func_t = StdFindIfOrNotFunctor; + + // run + reduction_value_type red_result; + reducer_type reducer(red_result); + const auto num_elements = Kokkos::Experimental::distance(first, last); + ::Kokkos::parallel_reduce(TeamThreadRange(teamHandle, 0, num_elements), + func_t(first, reducer, pred), reducer); + + teamHandle.team_barrier(); + + // decide and return + if (red_result.min_loc_true == + ::Kokkos::reduction_identity::min()) { + // here, it means a valid loc has not been found, + return last; + } else { + // a location has been found + return first + red_result.min_loc_true; + } +} + +template +KOKKOS_FUNCTION InputIterator find_team_impl(const TeamHandleType& teamHandle, + InputIterator first, + InputIterator last, + const T& value) { + return find_if_or_not_team_impl( + teamHandle, first, last, + ::Kokkos::Experimental::Impl::StdAlgoEqualsValUnaryPredicate(value)); +} + } // namespace Impl } // namespace Experimental } // namespace Kokkos diff --git a/algorithms/src/std_algorithms/impl/Kokkos_ForEachForEachN.hpp b/algorithms/src/std_algorithms/impl/Kokkos_ForEachForEachN.hpp index f9a6ff2e99..99cc4a1cf3 100644 --- a/algorithms/src/std_algorithms/impl/Kokkos_ForEachForEachN.hpp +++ b/algorithms/src/std_algorithms/impl/Kokkos_ForEachForEachN.hpp @@ -41,29 +41,28 @@ struct StdForEachFunctor { : m_first(std::move(_first)), m_functor(std::move(_functor)) {} }; -template -UnaryFunctorType for_each_impl(const std::string& label, - const ExecutionSpace& ex, IteratorType first, - IteratorType last, UnaryFunctorType functor) { +template +void for_each_exespace_impl(const std::string& label, const HandleType& handle, + IteratorType first, IteratorType last, + UnaryFunctorType functor) { // checks - Impl::static_assert_random_access_and_accessible(ex, first); + Impl::static_assert_random_access_and_accessible(handle, first); Impl::expect_valid_range(first, last); // run const auto num_elements = Kokkos::Experimental::distance(first, last); ::Kokkos::parallel_for( - label, RangePolicy(ex, 0, num_elements), + label, RangePolicy(handle, 0, num_elements), StdForEachFunctor(first, functor)); - ex.fence("Kokkos::for_each: fence after operation"); - - return functor; + handle.fence("Kokkos::for_each: fence after operation"); } template -IteratorType for_each_n_impl(const std::string& label, const ExecutionSpace& ex, - IteratorType first, SizeType n, - UnaryFunctorType functor) { +IteratorType for_each_n_exespace_impl(const std::string& label, + const ExecutionSpace& ex, + IteratorType first, SizeType n, + UnaryFunctorType functor) { auto last = first + n; Impl::static_assert_random_access_and_accessible(ex, first, last); Impl::expect_valid_range(first, last); @@ -72,8 +71,45 @@ IteratorType for_each_n_impl(const std::string& label, const ExecutionSpace& ex, return first; } - for_each_impl(label, ex, first, last, std::move(functor)); - // no neeed to fence since for_each_impl fences already + for_each_exespace_impl(label, ex, first, last, std::move(functor)); + // no need to fence since for_each_exespace_impl fences already + + return last; +} + +// +// team impl +// +template +KOKKOS_FUNCTION void for_each_team_impl(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last, + UnaryFunctorType functor) { + // checks + Impl::static_assert_random_access_and_accessible(teamHandle, first); + Impl::expect_valid_range(first, last); + // run + const auto num_elements = Kokkos::Experimental::distance(first, last); + ::Kokkos::parallel_for( + TeamThreadRange(teamHandle, 0, num_elements), + StdForEachFunctor(first, functor)); + teamHandle.team_barrier(); +} + +template +KOKKOS_FUNCTION IteratorType +for_each_n_team_impl(const TeamHandleType& teamHandle, IteratorType first, + SizeType n, UnaryFunctorType functor) { + auto last = first + n; + Impl::static_assert_random_access_and_accessible(teamHandle, first, last); + Impl::expect_valid_range(first, last); + + if (n == 0) { + return first; + } + + for_each_team_impl(teamHandle, first, last, std::move(functor)); + // no need to fence since for_each_team_impl fences already return last; } diff --git a/algorithms/src/std_algorithms/impl/Kokkos_FunctorsForExclusiveScan.hpp b/algorithms/src/std_algorithms/impl/Kokkos_FunctorsForExclusiveScan.hpp new file mode 100644 index 0000000000..5a7fe16984 --- /dev/null +++ b/algorithms/src/std_algorithms/impl/Kokkos_FunctorsForExclusiveScan.hpp @@ -0,0 +1,221 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KOKKOS_STD_ALGORITHMS_FUNCTORS_FOR_EXCLUSIVE_SCAN_IMPL_HPP +#define KOKKOS_STD_ALGORITHMS_FUNCTORS_FOR_EXCLUSIVE_SCAN_IMPL_HPP + +#include +#include "Kokkos_ValueWrapperForNoNeutralElement.hpp" + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +template +using ex_scan_has_reduction_identity_sum_t = + decltype(Kokkos::reduction_identity::sum()); + +template +struct ExclusiveScanDefaultFunctorForKnownNeutralElement { + using execution_space = ExeSpace; + ValueType m_init_value; + FirstFrom m_first_from; + FirstDest m_first_dest; + + KOKKOS_FUNCTION + ExclusiveScanDefaultFunctorForKnownNeutralElement(ValueType init, + FirstFrom first_from, + FirstDest first_dest) + : m_init_value(std::move(init)), + m_first_from(std::move(first_from)), + m_first_dest(std::move(first_dest)) {} + + KOKKOS_FUNCTION + void operator()(const IndexType i, ValueType& update, + const bool final_pass) const { + const auto tmp = m_first_from[i]; + if (final_pass) m_first_dest[i] = update + m_init_value; + update += tmp; + } +}; + +template +struct ExclusiveScanDefaultFunctorWithValueWrapper { + using execution_space = ExeSpace; + using value_type = + ::Kokkos::Experimental::Impl::ValueWrapperForNoNeutralElement; + ValueType m_init_value; + FirstFrom m_first_from; + FirstDest m_first_dest; + + KOKKOS_FUNCTION + ExclusiveScanDefaultFunctorWithValueWrapper(ValueType init, + FirstFrom first_from, + FirstDest first_dest) + : m_init_value(std::move(init)), + m_first_from(std::move(first_from)), + m_first_dest(std::move(first_dest)) {} + + KOKKOS_FUNCTION + void operator()(const IndexType i, value_type& update, + const bool final_pass) const { + const auto tmp = value_type{m_first_from[i], false}; + if (final_pass) { + if (i == 0) { + m_first_dest[i] = m_init_value; + } else { + m_first_dest[i] = update.val + m_init_value; + } + } + + this->join(update, tmp); + } + + KOKKOS_FUNCTION + void init(value_type& update) const { + update.val = {}; + update.is_initial = true; + } + + KOKKOS_FUNCTION + void join(value_type& update, const value_type& input) const { + if (input.is_initial) return; + + if (update.is_initial) { + update.val = input.val; + update.is_initial = false; + } else { + update.val = update.val + input.val; + } + } +}; + +template +struct TransformExclusiveScanFunctorWithValueWrapper { + using execution_space = ExeSpace; + using value_type = + ::Kokkos::Experimental::Impl::ValueWrapperForNoNeutralElement; + + ValueType m_init_value; + FirstFrom m_first_from; + FirstDest m_first_dest; + BinaryOpType m_binary_op; + UnaryOpType m_unary_op; + + KOKKOS_FUNCTION + TransformExclusiveScanFunctorWithValueWrapper(ValueType init, + FirstFrom first_from, + FirstDest first_dest, + BinaryOpType bop, + UnaryOpType uop) + : m_init_value(std::move(init)), + m_first_from(std::move(first_from)), + m_first_dest(std::move(first_dest)), + m_binary_op(std::move(bop)), + m_unary_op(std::move(uop)) {} + + KOKKOS_FUNCTION + void operator()(const IndexType i, value_type& update, + const bool final_pass) const { + const auto tmp = value_type{m_unary_op(m_first_from[i]), false}; + if (final_pass) { + if (i == 0) { + // for both ExclusiveScan and TransformExclusiveScan, + // init is unmodified + m_first_dest[i] = m_init_value; + } else { + m_first_dest[i] = m_binary_op(update.val, m_init_value); + } + } + + this->join(update, tmp); + } + + KOKKOS_FUNCTION void init(value_type& value) const { + value.val = {}; + value.is_initial = true; + } + + KOKKOS_FUNCTION + void join(value_type& update, const value_type& input) const { + if (input.is_initial) return; + + if (update.is_initial) { + update.val = input.val; + } else { + update.val = m_binary_op(update.val, input.val); + } + update.is_initial = false; + } +}; + +template +struct TransformExclusiveScanFunctorWithoutValueWrapper { + using execution_space = ExeSpace; + + ValueType m_init_value; + FirstFrom m_first_from; + FirstDest m_first_dest; + BinaryOpType m_binary_op; + UnaryOpType m_unary_op; + + KOKKOS_FUNCTION + TransformExclusiveScanFunctorWithoutValueWrapper(ValueType init, + FirstFrom first_from, + FirstDest first_dest, + BinaryOpType bop, + UnaryOpType uop) + : m_init_value(std::move(init)), + m_first_from(std::move(first_from)), + m_first_dest(std::move(first_dest)), + m_binary_op(std::move(bop)), + m_unary_op(std::move(uop)) {} + + KOKKOS_FUNCTION + void operator()(const IndexType i, ValueType& update, + const bool final_pass) const { + const auto tmp = ValueType{m_unary_op(m_first_from[i])}; + if (final_pass) { + if (i == 0) { + // for both ExclusiveScan and TransformExclusiveScan, + // init is unmodified + m_first_dest[i] = m_init_value; + } else { + m_first_dest[i] = m_binary_op(update, m_init_value); + } + } + + this->join(update, tmp); + } + + KOKKOS_FUNCTION + void init(ValueType& update) const { update = {}; } + + KOKKOS_FUNCTION + void join(ValueType& update, const ValueType& input) const { + update = m_binary_op(update, input); + } +}; + +} // namespace Impl +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/algorithms/src/std_algorithms/impl/Kokkos_GenerateGenerateN.hpp b/algorithms/src/std_algorithms/impl/Kokkos_GenerateGenerateN.hpp index 228390bdff..157de1125e 100644 --- a/algorithms/src/std_algorithms/impl/Kokkos_GenerateGenerateN.hpp +++ b/algorithms/src/std_algorithms/impl/Kokkos_GenerateGenerateN.hpp @@ -41,32 +41,65 @@ struct StdGenerateFunctor { : m_first(std::move(_first)), m_generator(std::move(_g)) {} }; +// +// generate impl +// template -void generate_impl(const std::string& label, const ExecutionSpace& ex, - IteratorType first, IteratorType last, Generator g) { +void generate_exespace_impl(const std::string& label, const ExecutionSpace& ex, + IteratorType first, IteratorType last, + Generator g) { // checks Impl::static_assert_random_access_and_accessible(ex, first); Impl::expect_valid_range(first, last); - // aliases - using func_t = StdGenerateFunctor; - // run const auto num_elements = Kokkos::Experimental::distance(first, last); ::Kokkos::parallel_for(label, RangePolicy(ex, 0, num_elements), - func_t(first, g)); + StdGenerateFunctor(first, g)); ex.fence("Kokkos::generate: fence after operation"); } +template +KOKKOS_FUNCTION void generate_team_impl(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last, + Generator g) { + // checks + Impl::static_assert_random_access_and_accessible(teamHandle, first); + Impl::expect_valid_range(first, last); + + // run + const auto num_elements = Kokkos::Experimental::distance(first, last); + ::Kokkos::parallel_for(TeamThreadRange(teamHandle, 0, num_elements), + StdGenerateFunctor(first, g)); + teamHandle.team_barrier(); +} + +// +// generate_n impl +// template -IteratorType generate_n_impl(const std::string& label, const ExecutionSpace& ex, - IteratorType first, Size count, Generator g) { +IteratorType generate_n_exespace_impl(const std::string& label, + const ExecutionSpace& ex, + IteratorType first, Size count, + Generator g) { + if (count <= 0) { + return first; + } + + generate_exespace_impl(label, ex, first, first + count, g); + return first + count; +} + +template +KOKKOS_FUNCTION IteratorType +generate_n_team_impl(const TeamHandleType& teamHandle, IteratorType first, + Size count, Generator g) { if (count <= 0) { return first; } - generate_impl(label, ex, first, first + count, g); + generate_team_impl(teamHandle, first, first + count, g); return first + count; } diff --git a/algorithms/src/std_algorithms/impl/Kokkos_InclusiveScan.hpp b/algorithms/src/std_algorithms/impl/Kokkos_InclusiveScan.hpp index ecd6ff39cd..0b4acec0fe 100644 --- a/algorithms/src/std_algorithms/impl/Kokkos_InclusiveScan.hpp +++ b/algorithms/src/std_algorithms/impl/Kokkos_InclusiveScan.hpp @@ -101,9 +101,12 @@ struct InclusiveScanDefaultFunctor { } }; +// +// exespace impl +// template -OutputIteratorType inclusive_scan_default_op_impl( +OutputIteratorType inclusive_scan_default_op_exespace_impl( const std::string& label, const ExecutionSpace& ex, InputIteratorType first_from, InputIteratorType last_from, OutputIteratorType first_dest) { @@ -143,7 +146,7 @@ OutputIteratorType inclusive_scan_default_op_impl( // ------------------------------------------------------------- template -OutputIteratorType inclusive_scan_custom_binary_op_impl( +OutputIteratorType inclusive_scan_custom_binary_op_exespace_impl( const std::string& label, const ExecutionSpace& ex, InputIteratorType first_from, InputIteratorType last_from, OutputIteratorType first_dest, BinaryOpType binary_op) { @@ -158,7 +161,7 @@ OutputIteratorType inclusive_scan_custom_binary_op_impl( using value_type = std::remove_const_t; using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor; - using func_type = TransformInclusiveScanNoInitValueFunctor< + using func_type = ExeSpaceTransformInclusiveScanNoInitValueFunctor< ExecutionSpace, index_type, value_type, InputIteratorType, OutputIteratorType, BinaryOpType, unary_op_type>; @@ -179,7 +182,7 @@ OutputIteratorType inclusive_scan_custom_binary_op_impl( // ------------------------------------------------------------- template -OutputIteratorType inclusive_scan_custom_binary_op_impl( +OutputIteratorType inclusive_scan_custom_binary_op_exespace_impl( const std::string& label, const ExecutionSpace& ex, InputIteratorType first_from, InputIteratorType last_from, OutputIteratorType first_dest, BinaryOpType binary_op, @@ -193,7 +196,7 @@ OutputIteratorType inclusive_scan_custom_binary_op_impl( // aliases using index_type = typename InputIteratorType::difference_type; using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor; - using func_type = TransformInclusiveScanWithInitValueFunctor< + using func_type = ExeSpaceTransformInclusiveScanWithInitValueFunctor< ExecutionSpace, index_type, ValueType, InputIteratorType, OutputIteratorType, BinaryOpType, unary_op_type>; @@ -203,13 +206,142 @@ OutputIteratorType inclusive_scan_custom_binary_op_impl( ::Kokkos::parallel_scan(label, RangePolicy(ex, 0, num_elements), func_type(first_from, first_dest, binary_op, - unary_op_type(), init_value)); + unary_op_type(), std::move(init_value))); ex.fence("Kokkos::inclusive_scan_custom_binary_op: fence after operation"); // return return first_dest + num_elements; } +// +// team impl +// +template +KOKKOS_FUNCTION OutputIteratorType inclusive_scan_default_op_team_impl( + const TeamHandleType& teamHandle, InputIteratorType first_from, + InputIteratorType last_from, OutputIteratorType first_dest) { + // checks + Impl::static_assert_random_access_and_accessible(teamHandle, first_from, + first_dest); + Impl::static_assert_iterators_have_matching_difference_type(first_from, + first_dest); + Impl::expect_valid_range(first_from, last_from); + + using value_type = + std::remove_const_t; + + // #if defined(KOKKOS_ENABLE_CUDA) + + using exe_space = typename TeamHandleType::execution_space; + using index_type = typename InputIteratorType::difference_type; + using func_type = std::conditional_t< + ::Kokkos::is_detected::value, + InclusiveScanDefaultFunctorForKnownIdentityElement< + exe_space, index_type, value_type, InputIteratorType, + OutputIteratorType>, + InclusiveScanDefaultFunctor>; + + // run + const auto num_elements = + Kokkos::Experimental::distance(first_from, last_from); + ::Kokkos::parallel_scan(TeamThreadRange(teamHandle, 0, num_elements), + func_type(first_from, first_dest)); + teamHandle.team_barrier(); + + // return + return first_dest + num_elements; +} + +// ------------------------------------------------------------- +// inclusive_scan_custom_binary_op_impl +// ------------------------------------------------------------- +template +KOKKOS_FUNCTION OutputIteratorType inclusive_scan_custom_binary_op_team_impl( + const TeamHandleType& teamHandle, InputIteratorType first_from, + InputIteratorType last_from, OutputIteratorType first_dest, + BinaryOpType binary_op) { + // checks + Impl::static_assert_random_access_and_accessible(teamHandle, first_from, + first_dest); + Impl::static_assert_iterators_have_matching_difference_type(first_from, + first_dest); + Impl::expect_valid_range(first_from, last_from); + + using value_type = + std::remove_const_t; + + static_assert( + ::Kokkos::is_detected_v, + "At the moment inclusive_scan doesn't support types without reduction " + "identity"); + + // #if defined(KOKKOS_ENABLE_CUDA) + + // aliases + using exe_space = typename TeamHandleType::execution_space; + using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor; + using func_type = TeamTransformInclusiveScanNoInitValueFunctor< + exe_space, value_type, InputIteratorType, OutputIteratorType, + BinaryOpType, unary_op_type>; + + // run + const auto num_elements = + Kokkos::Experimental::distance(first_from, last_from); + + ::Kokkos::parallel_scan( + TeamThreadRange(teamHandle, 0, num_elements), + func_type(first_from, first_dest, binary_op, unary_op_type())); + teamHandle.team_barrier(); + + return first_dest + num_elements; +} + +// ------------------------------------------------------------- +// inclusive_scan_custom_binary_op_impl with init_value +// ------------------------------------------------------------- +template +KOKKOS_FUNCTION OutputIteratorType inclusive_scan_custom_binary_op_team_impl( + const TeamHandleType& teamHandle, InputIteratorType first_from, + InputIteratorType last_from, OutputIteratorType first_dest, + BinaryOpType binary_op, ValueType init_value) { + // checks + Impl::static_assert_random_access_and_accessible(teamHandle, first_from, + first_dest); + Impl::static_assert_iterators_have_matching_difference_type(first_from, + first_dest); + Impl::expect_valid_range(first_from, last_from); + + static_assert( + ::Kokkos::is_detected_v, + "At the moment inclusive_scan doesn't support types without reduction " + "identity"); + + // #if defined(KOKKOS_ENABLE_CUDA) + + // aliases + using exe_space = typename TeamHandleType::execution_space; + using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor; + using func_type = TeamTransformInclusiveScanWithInitValueFunctor< + exe_space, ValueType, InputIteratorType, OutputIteratorType, BinaryOpType, + unary_op_type>; + + // run + const auto num_elements = + Kokkos::Experimental::distance(first_from, last_from); + ::Kokkos::parallel_scan(TeamThreadRange(teamHandle, 0, num_elements), + func_type(first_from, first_dest, binary_op, + unary_op_type(), std::move(init_value))); + teamHandle.team_barrier(); + + // return + return first_dest + num_elements; +} + } // namespace Impl } // namespace Experimental } // namespace Kokkos diff --git a/algorithms/src/std_algorithms/impl/Kokkos_IsPartitioned.hpp b/algorithms/src/std_algorithms/impl/Kokkos_IsPartitioned.hpp index 0fe2d246ff..281efca36b 100644 --- a/algorithms/src/std_algorithms/impl/Kokkos_IsPartitioned.hpp +++ b/algorithms/src/std_algorithms/impl/Kokkos_IsPartitioned.hpp @@ -62,9 +62,9 @@ struct StdIsPartitionedFunctor { }; template -bool is_partitioned_impl(const std::string& label, const ExecutionSpace& ex, - IteratorType first, IteratorType last, - PredicateType pred) { +bool is_partitioned_exespace_impl(const std::string& label, + const ExecutionSpace& ex, IteratorType first, + IteratorType last, PredicateType pred) { // true if all elements in the range [first, last) that satisfy // the predicate "pred" appear before all elements that don't. // Also returns true if [first, last) is empty. @@ -97,6 +97,7 @@ bool is_partitioned_impl(const std::string& label, const ExecutionSpace& ex, const auto num_elements = Kokkos::Experimental::distance(first, last); ::Kokkos::parallel_reduce(label, RangePolicy(ex, 0, num_elements), + func_t(first, reducer, pred), reducer); // fence not needed because reducing into scalar @@ -109,8 +110,72 @@ bool is_partitioned_impl(const std::string& label, const ExecutionSpace& ex, if (red_result.max_loc_true != red_id_max && red_result.min_loc_false != red_id_min) { + // this occurs when the reduction yields nontrivial values return red_result.max_loc_true < red_result.min_loc_false; + } else if (red_result.max_loc_true == red_id_max && + red_result.min_loc_false == 0) { + // this occurs when all values do NOT satisfy + // the predicate, and this corner case should also be true + return true; + } else if (first + red_result.max_loc_true == --last) { + // this occurs when all values satisfy the predicate, + // this corner case should also be true + return true; + } else { + return false; + } +} + +template +KOKKOS_FUNCTION bool is_partitioned_team_impl(const TeamHandleType& teamHandle, + IteratorType first, + IteratorType last, + PredicateType pred) { + /* see exespace impl for the description of the impl */ + + // checks + Impl::static_assert_random_access_and_accessible(teamHandle, first); + Impl::expect_valid_range(first, last); + + // trivial case + if (first == last) { + return true; + } + + // aliases + using index_type = typename IteratorType::difference_type; + using reducer_type = StdIsPartitioned; + using reduction_value_type = typename reducer_type::value_type; + using func_t = + StdIsPartitionedFunctor; + + // run + reduction_value_type red_result; + reducer_type reducer(red_result); + const auto num_elements = Kokkos::Experimental::distance(first, last); + ::Kokkos::parallel_reduce(TeamThreadRange(teamHandle, 0, num_elements), + func_t(first, reducer, pred), reducer); + + // fence not needed because reducing into scalar + + // decide and return + constexpr index_type red_id_min = + ::Kokkos::reduction_identity::min(); + constexpr index_type red_id_max = + ::Kokkos::reduction_identity::max(); + + if (red_result.max_loc_true != red_id_max && + red_result.min_loc_false != red_id_min) { + // this occurs when the reduction yields nontrivial values + return red_result.max_loc_true < red_result.min_loc_false; + } else if (red_result.max_loc_true == red_id_max && + red_result.min_loc_false == 0) { + // this occurs when all values do NOT satisfy + // the predicate, and this corner case should also be true + return true; } else if (first + red_result.max_loc_true == --last) { + // this occurs when all values satisfy the predicate, + // this corner case should also be true return true; } else { return false; diff --git a/algorithms/src/std_algorithms/impl/Kokkos_IsSorted.hpp b/algorithms/src/std_algorithms/impl/Kokkos_IsSorted.hpp index 4696821586..b2c912848a 100644 --- a/algorithms/src/std_algorithms/impl/Kokkos_IsSorted.hpp +++ b/algorithms/src/std_algorithms/impl/Kokkos_IsSorted.hpp @@ -48,10 +48,13 @@ struct StdIsSortedFunctor { : m_first(std::move(_first1)), m_comparator(std::move(comparator)) {} }; +// +// exespace impl +// template -bool is_sorted_impl(const std::string& label, const ExecutionSpace& ex, - IteratorType first, IteratorType last, - ComparatorType comp) { +bool is_sorted_exespace_impl(const std::string& label, const ExecutionSpace& ex, + IteratorType first, IteratorType last, + ComparatorType comp) { // checks Impl::static_assert_random_access_and_accessible(ex, first); Impl::expect_valid_range(first, last); @@ -75,11 +78,49 @@ bool is_sorted_impl(const std::string& label, const ExecutionSpace& ex, } template -bool is_sorted_impl(const std::string& label, const ExecutionSpace& ex, - IteratorType first, IteratorType last) { +bool is_sorted_exespace_impl(const std::string& label, const ExecutionSpace& ex, + IteratorType first, IteratorType last) { + using value_type = typename IteratorType::value_type; + using pred_t = Impl::StdAlgoLessThanBinaryPredicate; + return is_sorted_exespace_impl(label, ex, first, last, pred_t()); +} + +// +// team impl +// +template +KOKKOS_FUNCTION bool is_sorted_team_impl(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last, + ComparatorType comp) { + // checks + Impl::static_assert_random_access_and_accessible(teamHandle, first); + Impl::expect_valid_range(first, last); + + const auto num_elements = Kokkos::Experimental::distance(first, last); + if (num_elements <= 1) { + return true; + } + + // use num_elements-1 because each index handles i and i+1 + const auto num_elements_minus_one = num_elements - 1; + + // result is incremented by one if sorting breaks at index i + std::size_t result = 0; + ::Kokkos::parallel_reduce( + TeamThreadRange(teamHandle, 0, num_elements_minus_one), + // use CTAD here + StdIsSortedFunctor(first, std::move(comp)), result); + + return result == 0; +} + +template +KOKKOS_FUNCTION bool is_sorted_team_impl(const TeamHandleType& teamHandle, + IteratorType first, + IteratorType last) { using value_type = typename IteratorType::value_type; using pred_t = Impl::StdAlgoLessThanBinaryPredicate; - return is_sorted_impl(label, ex, first, last, pred_t()); + return is_sorted_team_impl(teamHandle, first, last, pred_t()); } } // namespace Impl diff --git a/algorithms/src/std_algorithms/impl/Kokkos_IsSortedUntil.hpp b/algorithms/src/std_algorithms/impl/Kokkos_IsSortedUntil.hpp index 2a0c112bf5..d33580ca53 100644 --- a/algorithms/src/std_algorithms/impl/Kokkos_IsSortedUntil.hpp +++ b/algorithms/src/std_algorithms/impl/Kokkos_IsSortedUntil.hpp @@ -54,10 +54,15 @@ struct StdIsSortedUntilFunctor { m_reducer(std::move(reducer)) {} }; +// +// overloads accepting exespace +// template -IteratorType is_sorted_until_impl(const std::string& label, - const ExecutionSpace& ex, IteratorType first, - IteratorType last, ComparatorType comp) { +IteratorType is_sorted_until_exespace_impl(const std::string& label, + const ExecutionSpace& ex, + IteratorType first, + IteratorType last, + ComparatorType comp) { // checks Impl::static_assert_random_access_and_accessible(ex, first); Impl::expect_valid_range(first, last); @@ -81,7 +86,6 @@ IteratorType is_sorted_until_impl(const std::string& label, label, // use num_elements-1 because each index handles i and i+1 RangePolicy(ex, 0, num_elements - 1), - // use CTAD StdIsSortedUntilFunctor(first, comp, reducer), reducer); /* If the reduction result is equal to the initial value, @@ -98,12 +102,66 @@ IteratorType is_sorted_until_impl(const std::string& label, } template -IteratorType is_sorted_until_impl(const std::string& label, - const ExecutionSpace& ex, IteratorType first, - IteratorType last) { +IteratorType is_sorted_until_exespace_impl(const std::string& label, + const ExecutionSpace& ex, + IteratorType first, + IteratorType last) { + using value_type = typename IteratorType::value_type; + using pred_t = Impl::StdAlgoLessThanBinaryPredicate; + return is_sorted_until_exespace_impl(label, ex, first, last, pred_t()); +} + +// +// overloads accepting team handle +// +template +KOKKOS_FUNCTION IteratorType +is_sorted_until_team_impl(const ExecutionSpace& teamHandle, IteratorType first, + IteratorType last, ComparatorType comp) { + // checks + Impl::static_assert_random_access_and_accessible(teamHandle, first); + Impl::expect_valid_range(first, last); + + const auto num_elements = Kokkos::Experimental::distance(first, last); + + // trivial case + if (num_elements <= 1) { + return last; + } + + /* + Do a par_reduce computing the *min* index that breaks the sorting. + If one such index is found, then the range is sorted until that element, + if no such index is found, then it means the range is sorted until the end. + */ + using index_type = typename IteratorType::difference_type; + index_type red_result; + index_type red_result_init; + ::Kokkos::Min reducer(red_result); + reducer.init(red_result_init); + ::Kokkos::parallel_reduce( // use num_elements-1 because each index handles i + // and i+1 + TeamThreadRange(teamHandle, 0, num_elements - 1), + StdIsSortedUntilFunctor(first, comp, reducer), reducer); + teamHandle.team_barrier(); + + /* If the reduction result is equal to the initial value, + and it means the range is sorted until the end */ + if (red_result == red_result_init) { + return last; + } else { + /* If such index is found, then the range is sorted until there and + we need to return an iterator past the element found so do +1 */ + return first + (red_result + 1); + } +} + +template +KOKKOS_FUNCTION IteratorType is_sorted_until_team_impl( + const ExecutionSpace& teamHandle, IteratorType first, IteratorType last) { using value_type = typename IteratorType::value_type; using pred_t = Impl::StdAlgoLessThanBinaryPredicate; - return is_sorted_until_impl(label, ex, first, last, pred_t()); + return is_sorted_until_team_impl(teamHandle, first, last, pred_t()); } } // namespace Impl diff --git a/algorithms/src/std_algorithms/impl/Kokkos_LexicographicalCompare.hpp b/algorithms/src/std_algorithms/impl/Kokkos_LexicographicalCompare.hpp index ad7f59232e..b95a66c3bd 100644 --- a/algorithms/src/std_algorithms/impl/Kokkos_LexicographicalCompare.hpp +++ b/algorithms/src/std_algorithms/impl/Kokkos_LexicographicalCompare.hpp @@ -84,13 +84,15 @@ struct StdLexicographicalCompareFunctor { m_comparator(std::move(_comp)) {} }; +// +// exespace impl +// template -bool lexicographical_compare_impl(const std::string& label, - const ExecutionSpace& ex, - IteratorType1 first1, IteratorType1 last1, - IteratorType2 first2, IteratorType2 last2, - ComparatorType comp) { +bool lexicographical_compare_exespace_impl( + const std::string& label, const ExecutionSpace& ex, IteratorType1 first1, + IteratorType1 last1, IteratorType2 first2, IteratorType2 last2, + ComparatorType comp) { // checks Impl::static_assert_random_access_and_accessible(ex, first1, first2); Impl::static_assert_iterators_have_matching_difference_type(first1, first2); @@ -139,16 +141,84 @@ bool lexicographical_compare_impl(const std::string& label, } template -bool lexicographical_compare_impl(const std::string& label, - const ExecutionSpace& ex, - IteratorType1 first1, IteratorType1 last1, - IteratorType2 first2, IteratorType2 last2) { +bool lexicographical_compare_exespace_impl( + const std::string& label, const ExecutionSpace& ex, IteratorType1 first1, + IteratorType1 last1, IteratorType2 first2, IteratorType2 last2) { + using value_type_1 = typename IteratorType1::value_type; + using value_type_2 = typename IteratorType2::value_type; + using predicate_t = + Impl::StdAlgoLessThanBinaryPredicate; + return lexicographical_compare_exespace_impl(label, ex, first1, last1, first2, + last2, predicate_t()); +} + +// +// team impl +// +template +KOKKOS_FUNCTION bool lexicographical_compare_team_impl( + const TeamHandleType& teamHandle, IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2, IteratorType2 last2, ComparatorType comp) { + // checks + Impl::static_assert_random_access_and_accessible(teamHandle, first1, first2); + Impl::static_assert_iterators_have_matching_difference_type(first1, first2); + Impl::expect_valid_range(first1, last1); + Impl::expect_valid_range(first2, last2); + + // aliases + using index_type = typename IteratorType1::difference_type; + using reducer_type = FirstLoc; + using reduction_value_type = typename reducer_type::value_type; + + // run + const auto d1 = Kokkos::Experimental::distance(first1, last1); + const auto d2 = Kokkos::Experimental::distance(first2, last2); + const auto range = Kokkos::min(d1, d2); + reduction_value_type red_result; + reducer_type reducer(red_result); + using func1_t = + StdLexicographicalCompareFunctor; + + ::Kokkos::parallel_reduce(TeamThreadRange(teamHandle, 0, range), + func1_t(first1, first2, reducer, comp), reducer); + + teamHandle.team_barrier(); + + // no mismatch + if (red_result.min_loc_true == + ::Kokkos::reduction_identity::min()) { + auto new_last1 = first1 + range; + auto new_last2 = first2 + range; + bool is_prefix = (new_last1 == last1) && (new_last2 != last2); + return is_prefix; + } + + // check mismatched + int less = 0; + auto it1 = first1 + red_result.min_loc_true; + auto it2 = first2 + red_result.min_loc_true; + using func2_t = StdCompareFunctor; + ::Kokkos::parallel_reduce(TeamThreadRange(teamHandle, 0, 1), + func2_t(it1, it2, comp), less); + + teamHandle.team_barrier(); + + return static_cast(less); +} + +template +KOKKOS_FUNCTION bool lexicographical_compare_team_impl( + const TeamHandleType& teamHandle, IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2, IteratorType2 last2) { using value_type_1 = typename IteratorType1::value_type; using value_type_2 = typename IteratorType2::value_type; using predicate_t = Impl::StdAlgoLessThanBinaryPredicate; - return lexicographical_compare_impl(label, ex, first1, last1, first2, last2, - predicate_t()); + return lexicographical_compare_team_impl(teamHandle, first1, last1, first2, + last2, predicate_t()); } } // namespace Impl diff --git a/algorithms/src/std_algorithms/impl/Kokkos_MinMaxMinmaxElement.hpp b/algorithms/src/std_algorithms/impl/Kokkos_MinMaxMinmaxElement.hpp index 048420f7a8..2f51db03b4 100644 --- a/algorithms/src/std_algorithms/impl/Kokkos_MinMaxMinmaxElement.hpp +++ b/algorithms/src/std_algorithms/impl/Kokkos_MinMaxMinmaxElement.hpp @@ -63,12 +63,16 @@ struct StdMinMaxElemFunctor { : m_first(std::move(first)), m_reducer(std::move(reducer)) {} }; +// +// exespace impl +// template