@@ -324,14 +324,14 @@ They can be deduced from the device if present at CMake configuration time.
324
324
325
325
</summary >
326
326
327
- | Option | Architecture |
328
- | --------------------------------| -----------------------|
329
- | ` -DKokkos_ARCH_INTEL_GEN=ON ` | Generic JIT |
330
- | ` -DKokkos_ARCH_INTEL_XEHP=ON ` | Xe-HP |
331
- | ` -DKokkos_ARCH_INTEL_PVC=ON ` | GPU Max/ Ponte Vecchio |
332
- | ` -DKokkos_ARCH_INTEL_DG1=ON ` | Iris XeMAX |
333
- | ` -DKokkos_ARCH_INTEL_GEN12=ON ` | Gen12 |
334
- | ` -DKokkos_ARCH_INTEL_GEN11=ON ` | Gen11 |
327
+ | Option | Architecture |
328
+ | --------------------------------| ------------------------- |
329
+ | ` -DKokkos_ARCH_INTEL_GEN=ON ` | Generic JIT |
330
+ | ` -DKokkos_ARCH_INTEL_XEHP=ON ` | Xe-HP |
331
+ | ` -DKokkos_ARCH_INTEL_PVC=ON ` | GPU Max ( Ponte Vecchio) |
332
+ | ` -DKokkos_ARCH_INTEL_DG1=ON ` | Iris XeMAX |
333
+ | ` -DKokkos_ARCH_INTEL_GEN12=ON ` | Gen12 |
334
+ | ` -DKokkos_ARCH_INTEL_GEN11=ON ` | Gen11 |
335
335
336
336
<!-- #ifndef PRINT-->
337
337
@@ -364,10 +364,6 @@ They can be deduced from the device if present at CMake configuration time.
364
364
| ` -DKokkos_ARCH_MAXWELL53=ON ` | Maxwell | 5.3 | |
365
365
| ` -DKokkos_ARCH_MAXWELL52=ON ` | Maxwell | 5.2 | M6, M60, M4, M40 |
366
366
| ` -DKokkos_ARCH_MAXWELL50=ON ` | Maxwell | 5.0 | M10 |
367
- | ` -DKokkos_ARCH_KEPLER37=ON ` | Kepler | 3.7 | K80 |
368
- | ` -DKokkos_ARCH_KEPLER35=ON ` | Kepler | 3.5 | K40, K20 |
369
- | ` -DKokkos_ARCH_KEPLER32=ON ` | Kepler | 3.2 | |
370
- | ` -DKokkos_ARCH_KEPLER30=ON ` | Kepler | 3.0 | K10 |
371
367
372
368
<!-- #ifndef PRINT-->
373
369
@@ -399,58 +395,68 @@ They can be deduced from the device if present at CMake configuration time.
399
395
cmake \
400
396
-B build \
401
397
-DCMAKE_BUILD_TYPE=Release \
402
- -DKokkos_ARCH_NATIVE =ON \
403
- -DKokkos_ENABLE_OPENMP =ON
398
+ -DKokkos_ENABLE_OPENMP =ON \
399
+ -DKokkos_ARCH_NATIVE =ON
404
400
```
405
401
406
- #### AMD MI250 GPU with HIP and OpenMP
402
+ #### AMD MI300A APU with HIP
407
403
408
404
``` sh
405
+ export HSA_XNACK=1
409
406
cmake \
410
407
-B build \
411
408
-DCMAKE_CXX_COMPILER=hipcc \
412
409
-DCMAKE_BUILD_TYPE=Release \
413
410
-DKokkos_ENABLE_HIP=ON \
414
- -DKokkos_ARCH_AMD_GFX90A=ON \
415
- -DKokkos_ENABLE_OPENMP=ON
411
+ -DKokkos_ARCH_AMD_GFX942_APU=ON
416
412
```
417
413
418
- #### NVIDIA A100 GPU with CUDA and OpenMP
414
+ Environment variable is required to access host allocations from the device.
415
+
416
+ #### AMD MI250 GPU with HIP
419
417
420
418
``` sh
421
419
cmake \
422
420
-B build \
421
+ -DCMAKE_CXX_COMPILER=hipcc \
423
422
-DCMAKE_BUILD_TYPE=Release \
424
- -DKokkos_ENABLE_CUDA=ON \
425
- -DKokkos_ARCH_AMPERE80=ON \
426
- -DKokkos_ENABLE_OPENMP=ON
423
+ -DKokkos_ENABLE_HIP=ON \
424
+ -DKokkos_ARCH_AMD_GFX90A=ON
427
425
```
428
426
429
- #### NVIDIA V100 GPU with CUDA and OpenMP
427
+ #### Intel GPU Max 1550 (Ponte Vecchio) with SYCL
430
428
431
429
``` sh
432
430
cmake \
433
431
-B build \
432
+ -DCMAKE_CXX_COMPILER=icpx \
434
433
-DCMAKE_BUILD_TYPE=Release \
435
- -DKokkos_ENABLE_CUDA =ON \
436
- -DKokkos_ARCH_VOLTA70 =ON \
437
- -DKokkos_ENABLE_OPENMP=ON
434
+ -DKokkos_ENABLE_SYCL =ON \
435
+ -DKokkos_ARCH_INTEL_PVC =ON \
436
+ -DCMAKE_CXX_FLAGS= " -fp-model=precise "
438
437
```
439
438
440
- #### Intel GPU Max/Ponte Vecchio GPU with SYCL and OpenMP
439
+ Last option is for math operators precision.
440
+
441
+ #### NVIDIA H100 GPU with CUDA
441
442
442
443
``` sh
443
444
cmake \
444
445
-B build \
445
- -DCMAKE_CXX_COMPILER=icpx \
446
446
-DCMAKE_BUILD_TYPE=Release \
447
- -DKokkos_ENABLE_SYCL=ON \
448
- -DKokkos_ARCH_INTEL_PVC=ON \
449
- -DKokkos_ENABLE_OPENMP=ON \
450
- -DCMAKE_CXX_FLAGS=" -fp-model=precise"
447
+ -DKokkos_ENABLE_CUDA=ON \
448
+ -DKokkos_ARCH_HOPPER90=ON
451
449
```
452
450
453
- Last option is for math operators precision.
451
+ #### NVIDIA A100 GPU with CUDA
452
+
453
+ ``` sh
454
+ cmake \
455
+ -B build \
456
+ -DCMAKE_BUILD_TYPE=Release \
457
+ -DKokkos_ENABLE_CUDA=ON \
458
+ -DKokkos_ARCH_AMPERE80=ON
459
+ ```
454
460
455
461
<!-- #ifndef PRINT-->
456
462
<img title =" Code " alt =" Code " src =" ./images/code_txt.svg " height =" 25 " > For more code examples:
0 commit comments