Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#include <iostream>

#include "RAJA/RAJA.hpp"
#include "umpire/Umpire.hpp"
#include "umpire/strategy/QuickPool.hpp"

int main()
{
constexpr int N{10000};
constexpr int M{7000};
double* a{nullptr};
double* a_t{nullptr};

auto& rm = umpire::ResourceManager::getInstance();

auto allocator = rm.getAllocator("UM");
auto pool = rm.makeAllocator<umpire::strategy::QuickPool>("POOL", allocator);

a = static_cast<double *>(pool.allocate(N*M*sizeof(double)));
a_t = static_cast<double *>(pool.allocate(N*M*sizeof(double)));

constexpr int DIM = 2;

RAJA::View<double, RAJA::Layout<DIM>> A(a, N, M);
RAJA::View<double, RAJA::Layout<DIM>> A_t(a_t, M, N);

RAJA::TypedRangeSegment<int> row_range(0, N);
RAJA::TypedRangeSegment<int> col_range(0, M);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Where is the TODO part of the exercise?


using EXEC_POL =
RAJA::KernelPolicy<
RAJA::statement::CudaKernel<
RAJA::statement::For<1, RAJA::cuda_global_size_y_loop<8>,
RAJA::statement::For<0, RAJA::cuda_global_size_x_direct<32>,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you think we will have to explain the block sizes at the tutorial?

RAJA::statement::Lambda<0>
>
>
>
>;

RAJA::kernel<EXEC_POL>(RAJA::make_tuple(col_range, row_range),
[=] RAJA_DEVICE (int col, int row) {
A_t(col, row) = A(row, col);
});

pool.deallocate(a);
pool.deallocate(a_t);

return 0;
}
4 changes: 4 additions & 0 deletions Intro_Tutorial/lessons/11_raja_device_kernel/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,8 @@ if (ENABLE_CUDA)
NAME 11_raja_device_kernel
SOURCES 11_raja_device_kernel.cpp
DEPENDS_ON RAJA umpire cuda)
blt_add_executable(
NAME 11_raja_transpose_kernel
SOURCES 11_raja_transpose_kernel.cpp
DEPENDS_ON RAJA umpire cuda)
endif()
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#include <iostream>

#include "RAJA/RAJA.hpp"
#include "umpire/Umpire.hpp"
#include "umpire/strategy/QuickPool.hpp"

int main()
{
constexpr int N{10000};
constexpr int M{7000};
double* a{nullptr};
double* a_t{nullptr};

auto& rm = umpire::ResourceManager::getInstance();

auto allocator = rm.getAllocator("UM");
auto pool = rm.makeAllocator<umpire::strategy::QuickPool>("POOL", allocator);

a = static_cast<double *>(pool.allocate(N*M*sizeof(double)));
a_t = static_cast<double *>(pool.allocate(N*M*sizeof(double)));

constexpr int DIM = 2;

RAJA::View<double, RAJA::Layout<DIM>> A(a, N, M);
RAJA::View<double, RAJA::Layout<DIM>> A_t(a_t, M, N);

RAJA::TypedRangeSegment<int> row_range(0, N);
RAJA::TypedRangeSegment<int> col_range(0, M);

using EXEC_POL =
RAJA::KernelPolicy<
RAJA::statement::CudaKernel<
RAJA::statement::For<1, RAJA::cuda_global_size_y_loop<8>,
RAJA::statement::For<0, RAJA::cuda_global_size_x_direct<32>,
RAJA::statement::Lambda<0>
>
>
>
>;

RAJA::kernel<EXEC_POL>(RAJA::make_tuple(col_range, row_range),
[=] RAJA_DEVICE (int col, int row) {
A_t(col, row) = A(row, col);
});

pool.deallocate(a);
pool.deallocate(a_t);

return 0;
}