-
Notifications
You must be signed in to change notification settings - Fork 6
added forall_with_streams and updated BenchmarkForall.cpp #232
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: develop
Are you sure you want to change the base?
Changes from 6 commits
c8b629c
d7f25c9
26fbe1b
291fd65
c31c924
c42abc2
533e0a4
5d3a07b
131dc67
0459805
4915fb4
bea5259
1f159f1
ada1508
aa90113
65ce8b0
25b8885
f761e5f
e28bfb5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,89 @@ | ||
| ////////////////////////////////////////////////////////////////////////////////////// | ||
| // Copyright 2020 Lawrence Livermore National Security, LLC and other CARE developers. | ||
| // See the top-level LICENSE file for details. | ||
| // | ||
| // SPDX-License-Identifier: BSD-3-Clause | ||
| ////////////////////////////////////////////////////////////////////////////////////// | ||
|
|
||
| // CARE headers | ||
| #include "care/DefaultMacros.h" | ||
| #include "care/host_device_ptr.h" | ||
| #include "care/forall.h" | ||
| #include "care/policies.h" | ||
| #include "RAJA/RAJA.hpp" | ||
|
|
||
| // Other library headers | ||
| #include <benchmark/benchmark.h> | ||
| #include <omp.h> | ||
|
|
||
| // Std library headers | ||
| #include <climits> | ||
| #include <cmath> | ||
|
|
||
| #define NUM_KERNELS 4 | ||
|
|
||
| using namespace care; | ||
|
|
||
| #if defined(CARE_GPUCC) | ||
| //each kernel has a separate stream | ||
| static void benchmark_gpu_loop_separate_streams(benchmark::State& state) { | ||
| int N = state.range(0); | ||
| const char * fileName = "test"; | ||
adayton1 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
|
||
| RAJA::resources::Cuda res_arr[NUM_KERNELS]; | ||
| RAJA::resources::Event event_arr[NUM_KERNELS]; | ||
| for(int i = 0; i < NUM_KERNELS; i++) | ||
| { | ||
| RAJA::resources::Cuda res; | ||
| res_arr[i] = res; | ||
| RAJA::resources::Event e = res.get_event(); | ||
| event_arr[i] = e; | ||
| } | ||
|
|
||
| care::host_device_ptr<int> arr(N, "arr"); | ||
| for (auto _ : state) { | ||
adayton1 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| //run num kernels | ||
| for(int j = 0; j < NUM_KERNELS; j++) | ||
| { | ||
| CARE_STREAMED_LOOP(i, res_arr[j], 0 , N) { | ||
| arr[i] = i; | ||
| } CARE_STREAMED_LOOP_END | ||
|
|
||
| if(j > 0) res_arr[j].wait_for(&event_arr[j - 1]); | ||
| } | ||
| } | ||
| arr.free(); | ||
| } | ||
|
|
||
| // Register the function as a benchmark | ||
| BENCHMARK(benchmark_gpu_loop_separate_streams)->Range(1, INT_MAX); | ||
|
|
||
| //all kernels on one stream | ||
| static void benchmark_gpu_loop_single_stream(benchmark::State& state) { | ||
| int N = state.range(0); | ||
| const char * fileName = "test"; | ||
| RAJA::resources::Cuda res; | ||
|
|
||
| care::host_device_ptr<int> arr(N, "arr"); | ||
|
|
||
| RAJA::resources::Event e = res.get_event(); | ||
| for (auto _ : state) { | ||
| //run num kernels | ||
| for(int j = 0; j < NUM_KERNELS; j++) | ||
| { | ||
| CARE_STREAMED_LOOP(i, res, 0 , N) { | ||
| arr[i] = i; | ||
| }CARE_STREAMED_LOOP_END | ||
| res.wait(); | ||
| } | ||
| } | ||
| arr.free(); | ||
| } | ||
|
|
||
| // Register the function as a benchmark | ||
| BENCHMARK(benchmark_gpu_loop_single_stream)->Range(1, INT_MAX); | ||
|
|
||
| #endif | ||
|
|
||
| // Run the benchmarks | ||
| BENCHMARK_MAIN(); | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -189,6 +189,49 @@ namespace care { | |
| #endif | ||
| } | ||
|
|
||
| //////////////////////////////////////////////////////////////////////////////// | ||
| /// | ||
| /// @author Neela Kausik | ||
| /// | ||
| /// @brief If GPU is available, execute on the device. Otherwise, execute on | ||
| /// the host. This specialization is needed for clang-query. | ||
| /// | ||
| /// @arg[in] gpu Used to choose this overload of forall | ||
| /// @arg[in] res Resource provided for execution | ||
| /// @arg[in] fileName The name of the file where this function is called | ||
| /// @arg[in] lineNumber The line number in the file where this function is called | ||
| /// @arg[in] start The starting index (inclusive) | ||
| /// @arg[in] end The ending index (exclusive) | ||
| /// @arg[in] body The loop body to execute at each index | ||
| /// | ||
| //////////////////////////////////////////////////////////////////////////////// | ||
|
|
||
| #if defined(CARE_GPUCC) | ||
| template <typename LB> | ||
| void forall_with_stream(gpu, RAJA::resources::Cuda res, const char * fileName, const int lineNumber, | ||
| const int start, const int end, LB&& body) { | ||
| #if CARE_ENABLE_PARALLEL_LOOP_BACKWARDS | ||
| s_reverseLoopOrder = true; | ||
| #endif | ||
|
|
||
| #if CARE_ENABLE_GPU_SIMULATION_MODE | ||
| forall(gpu_simulation{}, res, fileName, lineNumber, start, end, std::forward<LB>(body)); | ||
| #elif defined(__CUDACC__) | ||
| forall(RAJA::cuda_exec<CARE_CUDA_BLOCK_SIZE, CARE_CUDA_ASYNC>{}, | ||
| res, RAJA::RangeSegment(start, end), std::forward<LB>(body)); | ||
|
||
| #elif defined(__HIPCC__) | ||
| forall(RAJA::hip_exec<CARE_CUDA_BLOCK_SIZE, CARE_CUDA_ASYNC>{}, | ||
| res, RAJA::RangeSegment(start, end), std::forward<LB>(body)); | ||
| #else | ||
| forall(RAJA::seq_exec{}, res, fileName, lineNumber, start, end, std::forward<LB>(body)); | ||
| #endif | ||
|
|
||
| #if CARE_ENABLE_PARALLEL_LOOP_BACKWARDS | ||
| s_reverseLoopOrder = false; | ||
| #endif | ||
| } | ||
| #endif | ||
|
|
||
| //////////////////////////////////////////////////////////////////////////////// | ||
| /// | ||
| /// @author Alan Dayton | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.