Skip to content

Commit 7bf4e8c

Browse files
authored
Merge pull request #248 from LLNL/pr-from-fork/206
Pr for Basic Kokkos
2 parents a6ff66a + 6add081 commit 7bf4e8c

44 files changed

Lines changed: 1050 additions & 9 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.gitmodules

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,6 @@
44
[submodule "tpl/RAJA"]
55
path = tpl/RAJA
66
url = https://github.com/LLNL/RAJA.git
7+
[submodule "tpl/kokkos"]
8+
path = tpl/kokkos
9+
url = https://github.com/kokkos/kokkos

CMakeLists.txt

Lines changed: 56 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ cmake_minimum_required(VERSION 3.14.5)
1313

1414
option(ENABLE_RAJA_SEQUENTIAL "Run sequential variants of RAJA kernels. Disable
1515
this, and all other variants, to run _only_ raw C loops." On)
16+
option(ENABLE_KOKKOS "Include Kokkos implementations of the kernels in the RAJA Perfsuite" Off)
1617

1718
#
1819
# Note: the BLT build system is inheritted by RAJA and is initialized by RAJA
@@ -22,8 +23,13 @@ if (PERFSUITE_ENABLE_WARNINGS)
2223
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -Werror")
2324
endif()
2425

25-
set(CMAKE_CXX_STANDARD 14)
26-
set(BLT_CXX_STD c++14)
26+
if(ENABLE_KOKKOS)
27+
set(CMAKE_CXX_STANDARD 17)
28+
set(BLT_CXX_STD c++17)
29+
else()
30+
set(CMAKE_CXX_STANDARD 14)
31+
set(BLT_CXX_STD c++14)
32+
endif()
2733

2834
include(blt/SetupBLT.cmake)
2935

@@ -100,7 +106,12 @@ endif()
100106
if (ENABLE_CUDA)
101107
list(APPEND RAJA_PERFSUITE_DEPENDS cuda)
102108
endif()
103-
if (ENABLE_HIP)
109+
110+
# Kokkos requires hipcc as the CMAKE_CXX_COMPILER for HIP AMD/VEGA GPU
111+
# platforms, whereas RAJAPerf Suite uses blt/CMake FindHIP to set HIP compiler.
112+
# Separate RAJAPerf Suite and Kokkos handling of HIP compilers
113+
114+
if ((ENABLE_HIP) AND (NOT ENABLE_KOKKOS))
104115
message(STATUS "HIP version: ${hip_VERSION}")
105116
if("${hip_VERSION}" VERSION_LESS "3.5")
106117
message(FATAL_ERROR "Trying to use HIP/ROCm version ${hip_VERSION}. RAJA Perf Suite requires HIP/ROCm version 3.5 or newer. ")
@@ -113,8 +124,13 @@ set(RAJAPERF_BUILD_SYSTYPE $ENV{SYS_TYPE})
113124
set(RAJAPERF_BUILD_HOST $ENV{HOSTNAME})
114125

115126
if (ENABLE_CUDA)
116-
set(CMAKE_CUDA_STANDARD 14)
117-
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -restrict -arch ${CUDA_ARCH} --expt-extended-lambda --expt-relaxed-constexpr")
127+
if (ENABLE_KOKKOS)
128+
set(CMAKE_CUDA_STANDARD 17)
129+
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -restrict --extended-lambda --expt-relaxed-constexpr")
130+
else()
131+
set(CMAKE_CUDA_STANDARD 14)
132+
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -restrict -arch ${CUDA_ARCH} --expt-extended-lambda --expt-relaxed-constexpr")
133+
endif()
118134

119135
set(RAJAPERF_COMPILER "${CUDA_NVCC_EXECUTABLE}")
120136
list(APPEND RAJAPERF_COMPILER ${CMAKE_CXX_COMPILER})
@@ -135,13 +151,46 @@ configure_file(${CMAKE_SOURCE_DIR}/src/rajaperf_config.hpp.in
135151

136152
include_directories($<BUILD_INTERFACE:${PROJECT_BINARY_DIR}/include>)
137153

138-
# Make sure RAJA flag propagate (we need to do some house cleaning to
154+
# Make sure RAJA flags propagate (we need to do some tidying to
139155
# remove project-specific CMake variables that are no longer needed)
140156
set (CUDA_NVCC_FLAGS ${RAJA_NVCC_FLAGS})
141157

142158
#
143159
# Each directory in the perf suite has its own CMakeLists.txt file.
144-
#
160+
161+
# ENABLE_KOKKOS is A RAJAPerf Suite Option
162+
if(ENABLE_KOKKOS)
163+
add_definitions(-DRUN_KOKKOS)
164+
if(ENABLE_HIP)
165+
set(Kokkos_ENABLE_HIP ON CACHE BOOL "Kokkos builds for AMD HIP set the
166+
Kokkos_ENABLE_HIP variable to ON")
167+
endif()
168+
169+
if(ENABLE_TARGET_OPENMP)
170+
set(Kokkos_ENABLE_OPENMPTARGET ON CACHE BOOL "Docstring")
171+
if(NOT CMAKE_BUILD_TYPE MATCHES Debug)
172+
if(NOT EXPERIMENTAL_BUILD)
173+
message(FATAL_ERROR "Kokkos builds with OpenMPTarget require a Debug build to succeed at the moment. Rebuild with CMAKE_BUILD_TYPE=Debug. If you're a compiler developer, rebuild with -DEXPERIMENTAL_BUILD=ON")
174+
endif()
175+
endif()
176+
endif()
177+
178+
# ENABLE_CUDA IS A RAJA PERFSUITE OPTION
179+
if(ENABLE_CUDA)
180+
set(Kokkos_ENABLE_CUDA ON CACHE BOOL "Docstring")
181+
set(Kokkos_ENABLE_CUDA_LAMBDA ON CACHE BOOL "Docstring")
182+
enable_language(CUDA)
183+
endif()
184+
if(ENABLE_OPENMP)
185+
set(Kokkos_ENABLE_OPENMP ON CACHE BOOL "Docstring")
186+
endif()
187+
188+
add_subdirectory(tpl/kokkos)
189+
get_property(KOKKOS_INCLUDE_DIRS DIRECTORY tpl/kokkos PROPERTY INCLUDE_DIRECTORIES)
190+
include_directories(${KOKKOS_INCLUDE_DIRS})
191+
list(APPEND RAJA_PERFSUITE_DEPENDS kokkos)
192+
endif()
193+
145194
add_subdirectory(src)
146195

147196
if (RAJA_PERFSUITE_ENABLE_TESTS)

src/CMakeLists.txt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,9 @@
99
include_directories(.)
1010

1111
add_subdirectory(common)
12-
add_subdirectory(apps)
1312
add_subdirectory(basic)
13+
add_subdirectory(basic-kokkos)
14+
add_subdirectory(apps)
1415
add_subdirectory(lcals)
1516
add_subdirectory(polybench)
1617
add_subdirectory(stream)
@@ -20,6 +21,7 @@ set(RAJA_PERFSUITE_EXECUTABLE_DEPENDS
2021
common
2122
apps
2223
basic
24+
basic-kokkos
2325
lcals
2426
polybench
2527
stream

src/RAJAPerfSuiteDriver.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,10 @@
66
// SPDX-License-Identifier: (BSD-3-Clause)
77
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
88

9+
#ifdef RUN_KOKKOS
10+
#include <Kokkos_Core.hpp>
11+
#endif
12+
913
#include "common/Executor.hpp"
1014

1115
#include <iostream>
@@ -24,6 +28,9 @@ int main( int argc, char** argv )
2428
MPI_Comm_size(MPI_COMM_WORLD, &num_ranks);
2529
rajaperf::getCout() << "\n\nRunning with " << num_ranks << " MPI ranks..." << std::endl;
2630
#endif
31+
#ifdef RUN_KOKKOS
32+
Kokkos::initialize(argc, argv);
33+
#endif
2734

2835
// STEP 1: Create suite executor object
2936
rajaperf::Executor executor(argc, argv);
@@ -43,6 +50,9 @@ int main( int argc, char** argv )
4350

4451
rajaperf::getCout() << "\n\nDONE!!!...." << std::endl;
4552

53+
#ifdef RUN_KOKKOS
54+
Kokkos::finalize();
55+
#endif
4656
#ifdef RAJA_PERFSUITE_ENABLE_MPI
4757
MPI_Finalize();
4858
#endif

src/basic-kokkos/CMakeLists.txt

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
###############################################################################
2+
# Copyright (c) 2017-20, Lawrence Livermore National Security, LLC
3+
# and RAJA Performance Suite project contributors.
4+
# See the RAJAPerf/COPYRIGHT file for details.
5+
#
6+
# SPDX-License-Identifier: (BSD-3-Clause)
7+
###############################################################################
8+
9+
#include_directories(SYSTEM ${CMAKE_CURRENT_SOURCE_DIR}/../basic)
10+
11+
blt_add_library(
12+
NAME basic-kokkos
13+
SOURCES
14+
PI_ATOMIC-Kokkos.cpp
15+
DAXPY-Kokkos.cpp
16+
IF_QUAD-Kokkos.cpp
17+
INIT3-Kokkos.cpp
18+
INIT_VIEW1D-Kokkos.cpp
19+
INIT_VIEW1D_OFFSET-Kokkos.cpp
20+
MULADDSUB-Kokkos.cpp
21+
NESTED_INIT-Kokkos.cpp
22+
REDUCE3_INT-Kokkos.cpp
23+
TRAP_INT-Kokkos.cpp
24+
DAXPY_ATOMIC-Kokkos.cpp
25+
INCLUDES ${CMAKE_CURRENT_SOURCE_DIR}/../basic
26+
DEPENDS_ON common ${RAJA_PERFSUITE_DEPENDS}
27+
)

src/basic-kokkos/DAXPY-Kokkos.cpp

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
2+
// Copyright (c) 2017-20, Lawrence Livermore National Security, LLC
3+
// and RAJA Performance Suite project contributors.
4+
// See the RAJAPerf/COPYRIGHT file for details.
5+
//
6+
// SPDX-License-Identifier: (BSD-3-Clause)
7+
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
8+
9+
#include "DAXPY.hpp"
10+
#if defined(RUN_KOKKOS)
11+
#include "common/KokkosViewUtils.hpp"
12+
#include <iostream>
13+
14+
namespace rajaperf {
15+
namespace basic {
16+
17+
struct DaxpyFunctor {
18+
Real_ptr x;
19+
Real_ptr y;
20+
Real_type a;
21+
DaxpyFunctor(Real_ptr m_x, Real_ptr m_y, Real_type m_a)
22+
: x(m_x), y(m_y), a(m_a) {}
23+
void operator()(Index_type i) const { DAXPY_BODY; }
24+
};
25+
26+
void DAXPY::runKokkosVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) {
27+
const Index_type run_reps = getRunReps();
28+
const Index_type ibegin = 0;
29+
const Index_type iend = getActualProblemSize();
30+
31+
DAXPY_DATA_SETUP;
32+
33+
auto x_view = getViewFromPointer(x, iend);
34+
auto y_view = getViewFromPointer(y, iend);
35+
36+
switch (vid) {
37+
38+
case Kokkos_Lambda: {
39+
40+
Kokkos::fence();
41+
startTimer();
42+
43+
for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
44+
Kokkos::parallel_for(
45+
"DAXPY-Kokkos Kokkos_Lambda",
46+
Kokkos::RangePolicy<Kokkos::DefaultExecutionSpace>(ibegin, iend),
47+
KOKKOS_LAMBDA(Index_type i) { y_view[i] += a * x_view[i]; });
48+
}
49+
50+
Kokkos::fence();
51+
stopTimer();
52+
53+
break;
54+
}
55+
default: {
56+
std::cout << "\n DAXPY : Unknown variant id = " << vid << std::endl;
57+
}
58+
}
59+
60+
// Move data (i.e., pointer, KokkosView-wrapped ponter) back to the host from
61+
// the device
62+
63+
moveDataToHostFromKokkosView(x, x_view, iend);
64+
moveDataToHostFromKokkosView(y, y_view, iend);
65+
}
66+
67+
} // end namespace basic
68+
} // end namespace rajaperf
69+
#endif
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
2+
// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC
3+
// and RAJA Performance Suite project contributors.
4+
// See the RAJAPerf/LICENSE file for details.
5+
//
6+
// SPDX-License-Identifier: (BSD-3-Clause)
7+
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
8+
9+
#include "DAXPY_ATOMIC.hpp"
10+
#if defined(RUN_KOKKOS)
11+
#include "common/KokkosViewUtils.hpp"
12+
#include <iostream>
13+
14+
// Delete me
15+
// For de-bugging:
16+
#include "RAJA/RAJA.hpp"
17+
18+
namespace rajaperf {
19+
namespace basic {
20+
21+
void DAXPY_ATOMIC::runKokkosVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx))
22+
{
23+
24+
const Index_type run_reps = getRunReps();
25+
const Index_type ibegin = 0;
26+
const Index_type iend = getActualProblemSize();
27+
28+
DAXPY_ATOMIC_DATA_SETUP;
29+
//
30+
// Kokkos Views to wrap pointers declared in DAXPY_ATOMIC.hpp
31+
//
32+
33+
auto x_view = getViewFromPointer(x, iend);
34+
auto y_view = getViewFromPointer(y, iend);
35+
36+
switch (vid) {
37+
38+
case Kokkos_Lambda: {
39+
40+
Kokkos::fence();
41+
startTimer();
42+
43+
for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
44+
45+
Kokkos::parallel_for(
46+
"DAXPY_ATOMIC_Kokkos Kokkos_Lambda",
47+
Kokkos::RangePolicy<Kokkos::DefaultExecutionSpace>(ibegin, iend),
48+
KOKKOS_LAMBDA(Index_type i) {
49+
Kokkos::atomic_add(&y_view[i], a * x_view[i]);
50+
});
51+
}
52+
53+
Kokkos::fence();
54+
stopTimer();
55+
56+
break;
57+
}
58+
59+
default: {
60+
getCout() << "\n DAXPY_ATOMIC : Unknown variant id = " << vid << std::endl;
61+
}
62+
}
63+
64+
moveDataToHostFromKokkosView(x, x_view, iend);
65+
moveDataToHostFromKokkosView(y, y_view, iend);
66+
}
67+
68+
} // end namespace basic
69+
} // end namespace rajaperf
70+
#endif

0 commit comments

Comments
 (0)