Merge pull request #63 from LLNL/kab163/tutorial-06-edits

kab163 · web-flow · commit 898d94de9a89 · 2025-07-22T15:48:08.000-07:00
updates to lesson 6 + readme build line
diff --git a/Intro_Tutorial/lessons/06_raja_umpire_host_device/06_raja_umpire_host_device.cpp b/Intro_Tutorial/lessons/06_raja_umpire_host_device/06_raja_umpire_host_device.cpp
@@ -29,12 +29,11 @@ int main()
   a_h = static_cast<double*>(host_allocator.allocate(N*sizeof(double)));
   b_h = static_cast<double*>(host_allocator.allocate(N*sizeof(double)));
 
-  //TODO: fill in the forall statement with the CUDA execution policy.
-  //TODO: and its block size argument. Then be sure to use RAJA_DEVICE
-  RAJA::forall< ????? < ?????> >(
-    RAJA::TypedRangeSegment<int>(0, N), [=] ????? (int i) {
-      a[i] = 1.0;
-      b[i] = 1.0;
+  //TODO: fill in the forall statement with the sequential execution policy.
+  RAJA::forall< ????? >(
+    RAJA::TypedRangeSegment<int>(0, N), [=] (int i) {
+      a_h[i] = 1.0;
+      b_h[i] = 1.0;
     }
   );
 
diff --git a/Intro_Tutorial/lessons/06_raja_umpire_host_device/06_raja_umpire_host_device_solution.cpp b/Intro_Tutorial/lessons/06_raja_umpire_host_device/06_raja_umpire_host_device_solution.cpp
@@ -28,10 +28,9 @@ int main()
   a_h = static_cast<double*>(host_allocator.allocate(N*sizeof(double)));
   b_h = static_cast<double*>(host_allocator.allocate(N*sizeof(double)));
 
-  //TODO: fill in the forall statement with the CUDA execution policy.
-  //TODO: and its block size argument. Then be sure to use RAJA_DEVICE
-  RAJA::forall< RAJA::cuda_exec<CUDA_BLOCK_SIZE>>(
-    RAJA::TypedRangeSegment<int>(0, N), [=] RAJA_DEVICE (int i) {
+  //TODO: fill in the forall statement with the sequential execution policy.
+  RAJA::forall<RAJA::seq_exec>(
+    RAJA::TypedRangeSegment<int>(0, N), [=] (int i) {
       a_h[i] = 1.0;
       b_h[i] = 1.0;
     }
diff --git a/Intro_Tutorial/lessons/06_raja_umpire_host_device/README.md b/Intro_Tutorial/lessons/06_raja_umpire_host_device/README.md
@@ -37,7 +37,7 @@ memory resources at the bottom of this README!
 Now, let's learn how to use Umpire's operations to copy data
 between CPU and GPU memory in a portable way, using Umpire's memory resources.
 
-In `07_raja_umpire_host_device.cpp`, we create an allocator for the GPU with:
+In `06_raja_umpire_host_device.cpp`, we create an allocator for the GPU with:
 ```  
 auto allocator = rm.getAllocator("DEVICE");
 ```
@@ -66,7 +66,7 @@ void umpire::ResourceManager::copy (void* dst_ptr, void * src_ptr, std::size_t s
 
 *Note:* The destination is the first argument.
 
-In the file `07_raja_umpire_host_device.cpp`, there is a `TODO` comment where you should insert two copy
+In the file `06_raja_umpire_host_device.cpp`, there is a `TODO` comment where you should insert two copy
 calls to copy data from the CPU memory to the DEVICE memory.
 
 You will also find that we are adjusting the `RAJA::forall` to now work on the GPU.
@@ -88,8 +88,8 @@ be sure to check out the links at the bottom of this README.
 When you are done editing the file, compile and run it:
 
 ```
-$ make 07_raja_umpire_host_device
-$ ./bin/07_raja_umpire_host_device
+$ make 06_raja_umpire_host_device
+$ ./bin/06_raja_umpire_host_device
 ```
 Want to learn more about Umpire memory resources? Check out the list below! You can also learn 
 more by going to our online documentation on 
diff --git a/README.md b/README.md
@@ -54,7 +54,7 @@ cmake -DCMAKE_CXX_COMPILER=g++ -DBLT_CXX_STD=c++17 -DENABLE_CUDA=Off -DENABLE_OP
 module load cmake/3.23.1
 module load gcc/8.3.1
 module load cuda/11.2.0
-cmake -DBLT_CXX_STD=c++14 -DENABLE_CUDA=On -DENABLE_OPENMP=On -DCMAKE_CUDA_ARCHITECTURES=70 -DCMAKE_CUDA_COMPILER=/usr/tce/packages/cuda/cuda-11.2.0/bin/nvcc -DCUDA_TOOLKIT_ROOT_DIR=/usr/tce/packages/cuda/cuda-11.2.0 -DCMAKE_CUDA_FLAGS=--extended-lambda -DRAJA_ENABLE_EXERCISES=Off -DCMAKE_BUILD_TYPE=Release ..
+cmake -DBLT_CXX_STD=c++17 -DENABLE_CUDA=On -DENABLE_OPENMP=On -DCMAKE_CUDA_ARCHITECTURES=70 -DCMAKE_CUDA_COMPILER=/usr/tce/packages/cuda/cuda-11.2.0/bin/nvcc -DCUDA_TOOLKIT_ROOT_DIR=/usr/tce/packages/cuda/cuda-11.2.0 -DCMAKE_CUDA_FLAGS=--extended-lambda -DRAJA_ENABLE_EXERCISES=Off -DCMAKE_BUILD_TYPE=Release ..
 ```
 
 License