Skip to content

Commit

Permalink
Merge pull request cupy#8924 from jakirkham/use_nvidia_cccl_2.8_nvrtc…
Browse files Browse the repository at this point in the history
…_fix

Use NVIDIA CCCL 2.8 latest w/CUDA 12.3 fix
  • Loading branch information
kmaehashi authored and chainer-ci committed Feb 13, 2025
1 parent 0ecdc5d commit fa47710
Show file tree
Hide file tree
Showing 7 changed files with 11 additions and 13 deletions.
2 changes: 1 addition & 1 deletion .gitmodules
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[submodule "third_party/cccl"]
path = third_party/cccl
url = https://github.com/cupy/cccl.git
url = https://github.com/NVIDIA/cccl.git
[submodule "third_party/jitify"]
path = third_party/jitify
url = https://github.com/NVIDIA/jitify.git
Expand Down
12 changes: 6 additions & 6 deletions .pfnci/coverage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3249,7 +3249,9 @@ CuPy CI Test Coverage
-
* -
- cutensor
- 2
- 🚨
-
-
-
-
-
Expand All @@ -3270,8 +3272,6 @@ CuPy CI Test Coverage
-
-
-
- ✅
- ✅
-
-
-
Expand Down Expand Up @@ -3329,7 +3329,7 @@ CuPy CI Test Coverage
-
* -
- cutensor,cub
- 26
- 28
-
-
-
Expand All @@ -3350,8 +3350,8 @@ CuPy CI Test Coverage
- ✅
- ✅
- ✅
-
-
-
-
- ✅
- ✅
- ✅
Expand Down
2 changes: 1 addition & 1 deletion .pfnci/linux/tests/cuda123.multi.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ ACTIONS="$(dirname $0)/actions"

export NVCC="ccache nvcc"

export CUPY_ACCELERATORS="cutensor"
export CUPY_ACCELERATORS="cutensor,cub"

"$ACTIONS/build.sh"
export OMPI_ALLOW_RUN_AS_ROOT=1
Expand Down
2 changes: 1 addition & 1 deletion .pfnci/linux/tests/cuda123.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ ACTIONS="$(dirname $0)/actions"

export NVCC="ccache nvcc"

export CUPY_ACCELERATORS="cutensor"
export CUPY_ACCELERATORS="cutensor,cub"

"$ACTIONS/build.sh"
"$ACTIONS/unittest.sh" "not slow and not multi_gpu"
Expand Down
3 changes: 1 addition & 2 deletions .pfnci/matrix.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -322,7 +322,7 @@
mpi4py: null
cython: "0.29"
cuda-python: null
env:CUPY_ACCELERATORS: "cutensor"
env:CUPY_ACCELERATORS: "cutensor,cub"
test: "unit"

# CUDA 12.3 (Multi-GPU) | Linux
Expand All @@ -332,7 +332,6 @@
target: "cuda123.multi"
mpi4py: "3"
test: "unit-multi"
env:CUPY_ACCELERATORS: "cutensor"

# CUDA 12.4 | Linux
- project: "cupy.linux.cuda124"
Expand Down
1 change: 0 additions & 1 deletion .pfnci/windows/_flexci.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,6 @@ function ActivateCUDA($version) {
$Env:CUDA_PATH = $Env:CUDA_PATH_V12_2
} elseif ($version -eq "12.3") {
$Env:CUDA_PATH = $Env:CUDA_PATH_V12_3
$Env:CUPY_ACCELERATORS = ""
} elseif ($version -eq "12.4") {
$Env:CUDA_PATH = $Env:CUDA_PATH_V12_4
} elseif ($version -eq "12.5") {
Expand Down
2 changes: 1 addition & 1 deletion third_party/cccl
Submodule cccl updated 91 files
+2 −2 cub/cub/detail/launcher/cuda_runtime.cuh
+2 −2 cub/cub/device/dispatch/dispatch_adjacent_difference.cuh
+3 −3 cub/cub/device/dispatch/dispatch_batch_memcpy.cuh
+2 −2 cub/cub/device/dispatch/dispatch_for.cuh
+2 −2 cub/cub/device/dispatch/dispatch_for_each_in_extents.cuh
+2 −2 cub/cub/device/dispatch/dispatch_histogram.cuh
+2 −2 cub/cub/device/dispatch/dispatch_merge.cuh
+3 −3 cub/cub/device/dispatch/dispatch_merge_sort.cuh
+8 −9 cub/cub/device/dispatch/dispatch_radix_sort.cuh
+1 −1 cub/cub/device/dispatch/dispatch_reduce.cuh
+2 −2 cub/cub/device/dispatch/dispatch_reduce_by_key.cuh
+2 −2 cub/cub/device/dispatch/dispatch_rle.cuh
+2 −2 cub/cub/device/dispatch/dispatch_scan.cuh
+2 −2 cub/cub/device/dispatch/dispatch_scan_by_key.cuh
+40 −41 cub/cub/device/dispatch/dispatch_segmented_sort.cuh
+2 −2 cub/cub/device/dispatch/dispatch_select_if.cuh
+2 −2 cub/cub/device/dispatch/dispatch_three_way_partition.cuh
+3 −3 cub/cub/device/dispatch/dispatch_transform.cuh
+2 −2 cub/cub/device/dispatch/dispatch_unique_by_key.cuh
+313 −2 cub/cub/device/dispatch/tuning/tuning_radix_sort.cuh
+277 −24 cub/cub/device/dispatch/tuning/tuning_select_if.cuh
+1 −1 cub/cub/util_cpp_dialect.cuh
+3 −1 cub/cub/util_device.cuh
+1 −1 cub/test/catch2_test_launch_wrapper.cu
+1 −1 cub/test/catch2_test_vsmem.cu
+1 −1 docs/cub/developer_overview.rst
+2 −4 docs/libcudacxx/ptx/instructions/generated/mbarrier_test_wait.rst
+2 −4 docs/libcudacxx/ptx/instructions/generated/mbarrier_test_wait_parity.rst
+4 −8 docs/libcudacxx/ptx/instructions/generated/mbarrier_try_wait.rst
+4 −8 docs/libcudacxx/ptx/instructions/generated/mbarrier_try_wait_parity.rst
+16 −18 libcudacxx/include/cuda/__ptx/instructions/generated/mbarrier_test_wait.h
+16 −18 libcudacxx/include/cuda/__ptx/instructions/generated/mbarrier_test_wait_parity.h
+31 −38 libcudacxx/include/cuda/__ptx/instructions/generated/mbarrier_try_wait.h
+31 −38 libcudacxx/include/cuda/__ptx/instructions/generated/mbarrier_try_wait_parity.h
+8 −4 libcudacxx/include/cuda/__ptx/ptx_helper_functions.h
+2 −1 libcudacxx/include/cuda/discard_memory
+2 −0 libcudacxx/include/cuda/pipeline
+8 −0 libcudacxx/include/cuda/std/__cccl/builtin.h
+8 −14 libcudacxx/include/cuda/std/__exception/cuda_error.h
+12 −9 libcudacxx/include/cuda/std/__expected/bad_expected_access.h
+20 −0 libcudacxx/include/cuda/std/__expected/expected.h
+18 −0 libcudacxx/include/cuda/std/__expected/expected_base.h
+7 −0 libcudacxx/include/cuda/std/__expected/unexpected.h
+0 −4 libcudacxx/include/cuda/std/__functional/identity.h
+1 −1 libcudacxx/include/cuda/std/__internal/cpp_dialect.h
+0 −26 libcudacxx/include/cuda/std/__mdspan/config.h
+1 −0 libcudacxx/include/cuda/std/__memory/construct_at.h
+0 −2 libcudacxx/include/cuda/std/__utility/exchange.h
+18 −1 libcudacxx/include/cuda/std/__utility/pair.h
+5 −3 libcudacxx/include/cuda/std/detail/__access_property
+9 −6 libcudacxx/include/cuda/std/detail/__annotated_ptr
+4 −0 libcudacxx/include/cuda/std/detail/__config
+25 −0 libcudacxx/include/cuda/std/detail/libcxx/include/optional
+2 −0 libcudacxx/include/cuda/std/detail/libcxx/include/tuple
+20 −0 libcudacxx/include/cuda/std/detail/libcxx/include/variant
+2 −3 libcudacxx/include/cuda/stream_ref
+21 −0 libcudacxx/include/nv/detail/__target_macros
+7 −2 libcudacxx/include/nv/target
+1 −0 libcudacxx/test/libcudacxx/CMakeLists.txt
+2 −3 libcudacxx/test/libcudacxx/cuda/ptx/generated/mbarrier_test_wait.h
+2 −3 libcudacxx/test/libcudacxx/cuda/ptx/generated/mbarrier_test_wait_parity.h
+6 −7 libcudacxx/test/libcudacxx/cuda/ptx/generated/mbarrier_try_wait.h
+6 −7 libcudacxx/test/libcudacxx/cuda/ptx/generated/mbarrier_try_wait_parity.h
+202 −0 libcudacxx/test/libcudacxx/cuda/utilities/expected/device_only_types.pass.cpp
+200 −0 libcudacxx/test/libcudacxx/cuda/utilities/expected/host_only_types.pass.cpp
+137 −0 libcudacxx/test/libcudacxx/cuda/utilities/optional/device_only_types.pass.cpp
+135 −0 libcudacxx/test/libcudacxx/cuda/utilities/optional/host_only_types.pass.cpp
+81 −0 libcudacxx/test/libcudacxx/cuda/utilities/tuple/device_only_types.pass.cpp
+0 −0 libcudacxx/test/libcudacxx/cuda/utilities/tuple/forward_as_tuple_interop.pass.cpp
+90 −0 libcudacxx/test/libcudacxx/cuda/utilities/tuple/host_only_types.pass.cpp
+0 −0 libcudacxx/test/libcudacxx/cuda/utilities/tuple/vector_types_get.pass.cpp
+0 −0 libcudacxx/test/libcudacxx/cuda/utilities/tuple/vector_types_structured_bindings.pass.cpp
+0 −0 libcudacxx/test/libcudacxx/cuda/utilities/tuple/vector_types_tuple_element.pass.cpp
+0 −0 libcudacxx/test/libcudacxx/cuda/utilities/tuple/vector_types_tuple_size.pass.cpp
+84 −0 libcudacxx/test/libcudacxx/cuda/utilities/unexpected/device_only_types.pass.cpp
+86 −0 libcudacxx/test/libcudacxx/cuda/utilities/unexpected/host_only_types.pass.cpp
+93 −0 libcudacxx/test/libcudacxx/cuda/utilities/utility/pair/device_only_types.pass.cpp
+93 −0 libcudacxx/test/libcudacxx/cuda/utilities/utility/pair/host_only_types.pass.cpp
+0 −0 libcudacxx/test/libcudacxx/cuda/utilities/utility/pair/interop/pair.assign.pass.cpp
+0 −0 libcudacxx/test/libcudacxx/cuda/utilities/utility/pair/interop/pair.cons.pass.cpp
+0 −0 libcudacxx/test/libcudacxx/cuda/utilities/utility/pair/interop/pair.conv.pass.cpp
+123 −0 libcudacxx/test/libcudacxx/cuda/utilities/variant/device_only_types.pass.cpp
+131 −0 libcudacxx/test/libcudacxx/cuda/utilities/variant/host_only_types.pass.cpp
+2 −2 libcudacxx/test/libcudacxx/std/containers/views/mdspan/mdspan.mdspan.cons/ctad_c_array.pass.cpp
+1 −1 libcudacxx/test/libcudacxx/std/containers/views/mdspan/mdspan.mdspan.cons/ctad_const_c_array.pass.cpp
+14 −1 libcudacxx/test/public_headers_host_only/CMakeLists.txt
+156 −0 libcudacxx/test/support/host_device_types.h
+6 −0 libcudacxx/test/utils/nvidia/nvrtc/nvrtcc.cpp
+1 −1 thrust/thrust/detail/config/cpp_dialect.h
+3 −2 thrust/thrust/system/cuda/detail/core/agent_launcher.h
+5 −1 thrust/thrust/system/cuda/detail/core/triple_chevron_launch.h

0 comments on commit fa47710

Please sign in to comment.