-
Notifications
You must be signed in to change notification settings - Fork 6
added forall_with_streams and updated BenchmarkForall.cpp #232
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
neelakausik
wants to merge
19
commits into
develop
Choose a base branch
from
feature/kausik1/streams
base: develop
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from all commits
Commits
Show all changes
19 commits
Select commit
Hold shift + click to select a range
c8b629c
forall_with_streams and updated BenchmarkForall.cpp
d7f25c9
branch wasn't up to date
26fbe1b
multiple kernels
291fd65
care macros
c31c924
try to fix cuda error
c42abc2
Merge branch 'develop' into feature/kausik1/streams
adayton1 533e0a4
fixed indentation and macros
5d3a07b
legacy macros
131dc67
warmup kernels and multiple arrays
0459805
fixed loop size
4915fb4
added CARE_DLL to PluginData.h
bea5259
Revert "added CARE_DLL to PluginData.h"
1f159f1
addressed comments
ada1508
omp parallel for
aa90113
try to make benchmarks more similar
65ce8b0
set num threads
25b8885
forall resource overload
f761e5f
N threads
e28bfb5
resource template parameter
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,106 @@ | ||
| ////////////////////////////////////////////////////////////////////////////////////// | ||
| // Copyright 2020 Lawrence Livermore National Security, LLC and other CARE developers. | ||
| // See the top-level LICENSE file for details. | ||
| // | ||
| // SPDX-License-Identifier: BSD-3-Clause | ||
| ////////////////////////////////////////////////////////////////////////////////////// | ||
|
|
||
| // CARE headers | ||
| #include "care/DefaultMacros.h" | ||
| #include "care/host_device_ptr.h" | ||
| #include "care/forall.h" | ||
| #include "care/policies.h" | ||
| #include "RAJA/RAJA.hpp" | ||
|
|
||
| // Other library headers | ||
| #include <benchmark/benchmark.h> | ||
| #include <omp.h> | ||
|
|
||
| // Std library headers | ||
| #include <climits> | ||
| #include <cmath> | ||
|
|
||
| #define size 1000000 | ||
|
|
||
| #if defined(CARE_GPUCC) | ||
| //each kernel has a separate stream | ||
| static void benchmark_gpu_loop_separate_streams(benchmark::State& state) { | ||
| int N = state.range(0); | ||
| care::Resource res_arr[16]; | ||
| RAJA::resources::Event event_arr[16]; | ||
| care::host_device_ptr<int> arrays[16]; | ||
| for(int i = 0; i < N; i++) | ||
| { | ||
| res_arr[i] = care::Resource(); | ||
| event_arr[i] = res_arr[i].get_event(); | ||
| arrays[i] = care::host_device_ptr<int>(size, "arr"); | ||
| } | ||
|
|
||
| //warmup kernel | ||
| CARE_GPU_LOOP(i, 0 , size) { | ||
| arrays[0][i] = 0; | ||
| } CARE_GPU_LOOP_END | ||
|
|
||
| care::gpuDeviceSynchronize(__FILE__, __LINE__); | ||
|
|
||
| for (auto _ : state) { | ||
adayton1 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| //run num kernels | ||
| omp_set_num_threads(N); | ||
| #pragma omp parallel for | ||
| for(int j = 0; j < N; j++) | ||
adayton1 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| { | ||
| CARE_STREAMED_LOOP(res_arr[j], i, 0 , size) { | ||
| arrays[j][i] = sqrtf(i) + cosf(j) * powf(i, j); | ||
| } CARE_STREAMED_LOOP_END | ||
| } | ||
adayton1 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| care::gpuDeviceSynchronize(__FILE__, __LINE__); | ||
| } | ||
|
|
||
| for(int i = 0; i < N; i++){ | ||
| arrays[i].free(); | ||
| } | ||
| } | ||
|
|
||
| // Register the function as a benchmark | ||
| BENCHMARK(benchmark_gpu_loop_separate_streams)->Arg(1)->Arg(2)->Arg(4)->Arg(8)->Arg(12)->Arg(16); | ||
|
|
||
| //all kernels on one stream | ||
| static void benchmark_gpu_loop_single_stream(benchmark::State& state) { | ||
| int N = state.range(0); | ||
|
|
||
| care::host_device_ptr<int> arrays[16]; | ||
| for(int i = 0; i < N; i++) | ||
| { | ||
| arrays[i] = care::host_device_ptr<int>(size, "arr"); | ||
| } | ||
|
|
||
| //warmup kernel | ||
| CARE_GPU_LOOP(i, 0, size) { | ||
| arrays[0][i] = 0; | ||
| } CARE_GPU_LOOP_END | ||
|
|
||
| care::gpuDeviceSynchronize(__FILE__, __LINE__); | ||
|
|
||
| for (auto _ : state) { | ||
| //run num kernels | ||
| for(int j = 0; j < N; j++) | ||
| { | ||
| CARE_GPU_LOOP(i, 0, size) { | ||
| arrays[j][i] = sqrtf(i) + cosf(j) * powf(i, j); | ||
| } CARE_GPU_LOOP_END | ||
| } | ||
| care::gpuDeviceSynchronize(__FILE__, __LINE__); | ||
| } | ||
|
|
||
| for(int i = 0; i < N; i++){ | ||
| arrays[i].free(); | ||
| } | ||
| } | ||
|
|
||
| // Register the function as a benchmark | ||
| BENCHMARK(benchmark_gpu_loop_single_stream)->Arg(1)->Arg(2)->Arg(4)->Arg(8)->Arg(12)->Arg(16); | ||
|
|
||
| #endif | ||
|
|
||
| // Run the benchmarks | ||
| BENCHMARK_MAIN(); | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.