From b6bcddd811245f4beada8feb2ceda53239a555eb Mon Sep 17 00:00:00 2001 From: Allison Piper Date: Wed, 15 Oct 2025 18:30:37 +0000 Subject: [PATCH 01/23] Add MSVC jobs for C Parallel --- c/parallel/test/test_util.h | 4 ++-- c2h/include/c2h/catch2_test_helper.h | 5 +++-- c2h/include/c2h/checked_allocator.cuh | 31 +++++++++++++++++++++++---- ci/matrix.yaml | 20 +++++++++-------- ci/windows/build_cccl_c_parallel.ps1 | 28 ++++++++++++++++++++++++ ci/windows/test_cccl_c_parallel.ps1 | 31 +++++++++++++++++++++++++++ 6 files changed, 102 insertions(+), 17 deletions(-) create mode 100644 ci/windows/build_cccl_c_parallel.ps1 create mode 100644 ci/windows/test_cccl_c_parallel.ps1 diff --git a/c/parallel/test/test_util.h b/c/parallel/test/test_util.h index 1aefc72fc97..a9801c53c87 100644 --- a/c/parallel/test/test_util.h +++ b/c/parallel/test/test_util.h @@ -49,9 +49,9 @@ inline std::string inspect_sass(const void* cubin, size_t cubin_size) temp_in_file.close(); std::string command = "nvdisasm -gi "; - command += temp_in_filename; + command += temp_in_filename.string(); command += " > "; - command += temp_out_filename; + command += temp_out_filename.string(); int exec_code = std::system(command.c_str()); diff --git a/c2h/include/c2h/catch2_test_helper.h b/c2h/include/c2h/catch2_test_helper.h index bff191a30fa..517673a8acc 100644 --- a/c2h/include/c2h/catch2_test_helper.h +++ b/c2h/include/c2h/catch2_test_helper.h @@ -18,6 +18,7 @@ #include #include +#include #include #include #include @@ -403,8 +404,8 @@ inline std::size_t get_override_seed_count() { // Setting this environment variable forces a fixed number of seeds to be generated, regardless of the requested // count. Set to 1 to reduce redundant, expensive testing when using sanitizers, etc. - static const char* override_str = std::getenv("C2H_SEED_COUNT_OVERRIDE"); - static const int override_seeds = override_str ? std::atoi(override_str) : 0; + static std::optional override_str = c2h::detail::get_env("C2H_SEED_COUNT_OVERRIDE"); + static const int override_seeds = override_str ? std::atoi(override_str->c_str()) : 0; return override_seeds; } diff --git a/c2h/include/c2h/checked_allocator.cuh b/c2h/include/c2h/checked_allocator.cuh index d9c30940e6b..cd9c0d24b85 100644 --- a/c2h/include/c2h/checked_allocator.cuh +++ b/c2h/include/c2h/checked_allocator.cuh @@ -36,6 +36,8 @@ #include #include #include +#include +#include #include @@ -44,6 +46,27 @@ namespace c2h namespace detail { +inline std::optional get_env(const char* name) +{ +#ifdef _WIN32 + char* buf = nullptr; + std::size_t len = 0; + if (_dupenv_s(&buf, &len, name) || !buf) + { + return std::nullopt; + } + std::string val(buf); + free(buf); + return val; +#else + if (const char* v = std::getenv(name)) + { + return std::string(v); + } + return std::nullopt; +#endif +} + struct memory_info { std::size_t free{}; @@ -55,15 +78,15 @@ struct memory_info // will be limited to this number of bytes. inline std::size_t get_device_memory_limit() { - static const char* override_str = std::getenv("C2H_DEVICE_MEMORY_LIMIT"); - static std::size_t result = override_str ? static_cast(std::atoll(override_str)) : 0; + static std::optional override_str = get_env("C2H_DEVICE_MEMORY_LIMIT"); + static std::size_t result = override_str ? static_cast(std::atoll(override_str->c_str())) : 0; return result; } inline bool get_debug_checked_allocs() { - static const char* debug_checked_allocs = std::getenv("C2H_DEBUG_CHECKED_ALLOC_FAILURES"); - static bool result = debug_checked_allocs && (std::atoi(debug_checked_allocs) != 0); + static std::optional debug_checked_allocs = get_env("C2H_DEBUG_CHECKED_ALLOC_FAILURES"); + static bool result = debug_checked_allocs && (std::atoi(debug_checked_allocs->c_str()) != 0); return result; } diff --git a/ci/matrix.yaml b/ci/matrix.yaml index 78851db1c6e..f493b582eda 100644 --- a/ci/matrix.yaml +++ b/ci/matrix.yaml @@ -21,6 +21,8 @@ workflows: # args: '--preset libcudacxx-cpp20 --lit-tests "cuda/utility/basic_any.pass.cpp"' } # override: + - {jobs: ['test'], project: ['cccl_c_parallel'], ctk: '12.X', cxx: ['msvc'], gpu: ['rtx2080']} + - {jobs: ['test'], project: ['cccl_c_parallel'], ctk: '13.X', cxx: ['msvc'], gpu: ['rtx2080', 'l4', 'h100']} pull_request: # Old CTK: Oldest/newest supported host compilers: @@ -56,9 +58,9 @@ workflows: - {jobs: ['test_gpu'], project: 'thrust', cmake_options: '-DTHRUST_DISPATCH_TYPE=Force32bit', gpu: 'rtx4090'} - {jobs: ['nvrtc'], project: 'libcudacxx', std: 'all', gpu: 'rtx2080', sm: 'gpu'} - {jobs: ['verify_codegen'], project: 'libcudacxx'} - # c.parallel -- pinned to gcc13 to match python - - {jobs: ['test'], project: 'cccl_c_parallel', ctk: '12.X', cxx: 'gcc13', gpu: ['rtx2080']} - - {jobs: ['test'], project: 'cccl_c_parallel', ctk: '13.X', cxx: 'gcc13', gpu: ['rtx2080', 'l4', 'h100']} + # c.parallel -- pinned to gcc13 on Linux to match python + - {jobs: ['test'], project: 'cccl_c_parallel', ctk: '12.X', cxx: ['gcc13', 'msvc'], gpu: ['rtx2080']} + - {jobs: ['test'], project: 'cccl_c_parallel', ctk: '13.X', cxx: ['gcc13', 'msvc'], gpu: ['rtx2080', 'l4', 'h100']} # c.experimental.stf-- pinned to gcc13 to match python - {jobs: ['test'], project: 'cccl_c_stf', ctk: '12.X', cxx: 'gcc13', gpu: ['rtx2080']} - {jobs: ['test'], project: 'cccl_c_stf', ctk: '13.X', cxx: 'gcc13', gpu: ['rtx2080', 'l4', 'h100']} @@ -106,8 +108,8 @@ workflows: # stdpar - {project: 'stdpar', jobs: ['build'], std: 'max', ctk: 'nvhpc', cxx: 'nvhpc'} # Python + support - - {project: 'cccl_c_parallel', jobs: ['test'], ctk: '13.X', cxx: 'gcc13', gpu: 'rtx2080', sm: 'gpu'} - - {project: 'cccl_c_stf', jobs: ['test'], ctk: '13.X', cxx: 'gcc13', gpu: 'rtx2080', sm: 'gpu'} + - {project: 'cccl_c_parallel', jobs: ['test'], ctk: '13.X', cxx: ['gcc13', 'msvc'], gpu: 'rtx2080', sm: 'gpu'} + - {project: 'cccl_c_stf', jobs: ['test'], ctk: '13.X', cxx: 'gcc13', gpu: 'rtx2080', sm: 'gpu'} - {project: 'python', jobs: ['test'], ctk: '13.X', py_version: '3.13', gpu: 'l4', cxx: ['gcc13', 'msvc']} # Packaging / install - {project: 'packaging', jobs: ['test'], ctk: '13.X', cxx: ['gcc', 'clang'], gpu: 'rtx2080', sm: 'gpu'} @@ -170,8 +172,8 @@ workflows: - {jobs: ['nvrtc'], project: 'libcudacxx', ctk: [ '12.X', '13.0', '13.X'], cxx: 'gcc12', std: 'all', gpu: 'rtx2080', sm: 'gpu'} - {jobs: ['verify_codegen'], project: 'libcudacxx', ctk: ['12.0', '12.X', '13.0', '13.X'], cxx: 'gcc12'} # c.parallel -- pinned to gcc13 to match python - - {jobs: ['test'], project: ['cccl_c_parallel'], ctk: '12.X', cxx: 'gcc13', gpu: ['rtx2080']} - - {jobs: ['test'], project: ['cccl_c_parallel'], ctk: '13.X', cxx: 'gcc13', gpu: ['rtx2080', 'l4', 'h100']} + - {jobs: ['test'], project: ['cccl_c_parallel'], ctk: '12.X', cxx: ['gcc13', 'msvc'], gpu: ['rtx2080']} + - {jobs: ['test'], project: ['cccl_c_parallel'], ctk: '13.X', cxx: ['gcc13', 'msvc'], gpu: ['rtx2080', 'l4', 'h100']} # c.experimental.stf -- pinned to gcc13 to match python - {jobs: ['test'], project: ['cccl_c_stf'], ctk: '12.X', cxx: 'gcc13', gpu: ['rtx2080']} - {jobs: ['test'], project: ['cccl_c_stf'], ctk: '13.X', cxx: 'gcc13', gpu: ['rtx2080', 'l4', 'h100']} @@ -247,8 +249,8 @@ workflows: - {jobs: ['nvrtc'], project: 'libcudacxx', ctk: [ '12.X', '13.0', '13.X'], cxx: 'gcc12', std: 'all', gpu: 'rtx2080', sm: 'gpu'} - {jobs: ['verify_codegen'], project: 'libcudacxx', ctk: ['12.0', '12.X', '13.0', '13.X'], cxx: 'gcc12'} # c.parallel -- pinned to gcc13 to match python - - {jobs: ['test'], project: ['cccl_c_parallel'], ctk: '12.X', cxx: 'gcc13', gpu: ['rtx2080']} - - {jobs: ['test'], project: ['cccl_c_parallel'], ctk: '13.X', cxx: 'gcc13', gpu: ['rtx2080', 'l4', 'h100']} + - {jobs: ['test'], project: ['cccl_c_parallel'], ctk: '12.X', cxx: ['gcc13', 'msvc'], gpu: ['rtx2080']} + - {jobs: ['test'], project: ['cccl_c_parallel'], ctk: '13.X', cxx: ['gcc13', 'msvc'], gpu: ['rtx2080', 'l4', 'h100']} # c.experimental.stf -- pinned to gcc13 to match python - {jobs: ['test'], project: ['cccl_c_stf'], ctk: '12.X', cxx: 'gcc13', gpu: ['rtx2080']} - {jobs: ['test'], project: ['cccl_c_stf'], ctk: '13.X', cxx: 'gcc13', gpu: ['rtx2080', 'l4', 'h100']} diff --git a/ci/windows/build_cccl_c_parallel.ps1 b/ci/windows/build_cccl_c_parallel.ps1 new file mode 100644 index 00000000000..19a83a550fa --- /dev/null +++ b/ci/windows/build_cccl_c_parallel.ps1 @@ -0,0 +1,28 @@ +Param( + [Parameter(Mandatory = $false)] + [Alias("arch")] + [string]$CUDA_ARCH = "", + [Parameter(Mandatory = $false)] + [Alias("cmake-options")] + [string]$CMAKE_OPTIONS = "" +) + +$ErrorActionPreference = "Stop" + +$CURRENT_PATH = Split-Path $pwd -leaf +If($CURRENT_PATH -ne "ci") { + Write-Host "Moving to ci folder" + pushd "$PSScriptRoot/.." +} + +Remove-Module -Name build_common -ErrorAction SilentlyContinue +Import-Module $PSScriptRoot/build_common.psm1 -ArgumentList @(20, $CUDA_ARCH, $CMAKE_OPTIONS) + +$PRESET = "cccl-c-parallel" +$LOCAL_CMAKE_OPTIONS = "" + +configure_and_build_preset "CCCL C Parallel" $PRESET $LOCAL_CMAKE_OPTIONS + +If($CURRENT_PATH -ne "ci") { + popd +} diff --git a/ci/windows/test_cccl_c_parallel.ps1 b/ci/windows/test_cccl_c_parallel.ps1 new file mode 100644 index 00000000000..593a3bec13a --- /dev/null +++ b/ci/windows/test_cccl_c_parallel.ps1 @@ -0,0 +1,31 @@ +Param( + [Parameter(Mandatory = $false)] + [Alias("arch")] + [string]$CUDA_ARCH = "", + [Parameter(Mandatory = $false)] + [Alias("cmake-options")] + [string]$CMAKE_OPTIONS = "" +) + +$ErrorActionPreference = "Stop" + +$CURRENT_PATH = Split-Path $pwd -leaf +If($CURRENT_PATH -ne "ci") { + Write-Host "Moving to ci folder" + pushd "$PSScriptRoot/.." +} + +# Build first +$buildCmd = "$PSScriptRoot/build_cccl_c_parallel.ps1 -arch '$CUDA_ARCH' -cmake-options '$CMAKE_OPTIONS'" +Write-Host "Running: $buildCmd" +Invoke-Expression $buildCmd + +Remove-Module -Name build_common -ErrorAction SilentlyContinue +Import-Module -Name "$PSScriptRoot/build_common.psm1" -ArgumentList @(20, $CUDA_ARCH, $CMAKE_OPTIONS) + +$PRESET = "cccl-c-parallel" +test_preset "CCCL C Parallel" "$PRESET" + +If($CURRENT_PATH -ne "ci") { + popd +} From 91c0a1750f2a746b26552b02c4e54e876c62b8d3 Mon Sep 17 00:00:00 2001 From: Allison Piper Date: Sat, 25 Oct 2025 22:13:04 +0000 Subject: [PATCH 02/23] Add missing header. --- c/parallel/test/test_util.h | 1 + 1 file changed, 1 insertion(+) diff --git a/c/parallel/test/test_util.h b/c/parallel/test/test_util.h index a9801c53c87..848237a2dd1 100644 --- a/c/parallel/test/test_util.h +++ b/c/parallel/test/test_util.h @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include From e0d088c272b3d8a895353daff3116d5ef73b0e44 Mon Sep 17 00:00:00 2001 From: Allison Piper Date: Sun, 26 Oct 2025 12:29:28 +0000 Subject: [PATCH 03/23] Initialize var. --- c/parallel/test/algorithm_execution.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/c/parallel/test/algorithm_execution.h b/c/parallel/test/algorithm_execution.h index 4cde9bde36b..97318ff6fa3 100644 --- a/c/parallel/test/algorithm_execution.h +++ b/c/parallel/test/algorithm_execution.h @@ -109,7 +109,7 @@ void AlgorithmExecute(std::optional& cache, const std::optional::init(); - BuildResultT build; + BuildResultT build{}; bool found = false; const bool cache_and_key = bool(cache) && bool(lookup_key); From 4ab63736177b9d0b3aef75f4a26002a87df4a45d Mon Sep 17 00:00:00 2001 From: Allison Piper Date: Sun, 26 Oct 2025 12:29:45 +0000 Subject: [PATCH 04/23] Add CCCL compiler flags to c2h --- c2h/CMakeLists.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/c2h/CMakeLists.txt b/c2h/CMakeLists.txt index 23bb4bd9560..661398ca4fb 100644 --- a/c2h/CMakeLists.txt +++ b/c2h/CMakeLists.txt @@ -28,7 +28,9 @@ target_include_directories(cccl.c2h PUBLIC "${C2H_SOURCE_DIR}/include") target_link_libraries(cccl.c2h PUBLIC CCCL::CCCL Catch2::Catch2 + cccl.compiler_interface_cpp20 ) +cccl_configure_target(cccl.c2h DIALECT 20) if (C2H_ENABLE_CURAND) target_link_libraries(cccl.c2h PRIVATE CUDA::curand) From fc17be2bd4ac2bf980dbefcb2b26a7cea240f310 Mon Sep 17 00:00:00 2001 From: Allison Piper Date: Sun, 26 Oct 2025 12:30:36 +0000 Subject: [PATCH 05/23] Replace vector type depr supp with CTK checks. --- c2h/generators_vector.cu | 25 ++++++++----------------- 1 file changed, 8 insertions(+), 17 deletions(-) diff --git a/c2h/generators_vector.cu b/c2h/generators_vector.cu index c66b16b2786..939e52dcc42 100644 --- a/c2h/generators_vector.cu +++ b/c2h/generators_vector.cu @@ -83,12 +83,11 @@ VEC_SPECIALIZATION(int4); VEC_SPECIALIZATION(long2); VEC_SPECIALIZATION(long3); -_CCCL_SUPPRESS_DEPRECATED_PUSH -VEC_SPECIALIZATION(long4); -_CCCL_SUPPRESS_DEPRECATED_POP # if _CCCL_CTK_AT_LEAST(13, 0) VEC_SPECIALIZATION(long4_16a); VEC_SPECIALIZATION(long4_32a); +# else +VEC_SPECIALIZATION(long4); # endif // _CCCL_CTK_AT_LEAST(13, 0) // VEC_SPECIALIZATION(ulong2); @@ -97,22 +96,20 @@ VEC_SPECIALIZATION(long4_32a); VEC_SPECIALIZATION(longlong2); VEC_SPECIALIZATION(longlong3); -_CCCL_SUPPRESS_DEPRECATED_PUSH -VEC_SPECIALIZATION(longlong4); -_CCCL_SUPPRESS_DEPRECATED_POP # if _CCCL_CTK_AT_LEAST(13, 0) VEC_SPECIALIZATION(longlong4_16a); VEC_SPECIALIZATION(longlong4_32a); +# else +VEC_SPECIALIZATION(longlong4); # endif // _CCCL_CTK_AT_LEAST(13, 0) VEC_SPECIALIZATION(ulonglong2); // VEC_SPECIALIZATION(ulonglong3); -_CCCL_SUPPRESS_DEPRECATED_PUSH -VEC_SPECIALIZATION(ulonglong4); -_CCCL_SUPPRESS_DEPRECATED_POP # if _CCCL_CTK_AT_LEAST(13, 0) VEC_SPECIALIZATION(ulonglong4_16a); VEC_SPECIALIZATION(ulonglong4_32a); +# else +VEC_SPECIALIZATION(ulonglong4); # endif // _CCCL_CTK_AT_LEAST(13, 0) VEC_SPECIALIZATION(float2); @@ -121,12 +118,11 @@ VEC_SPECIALIZATION(float4); VEC_SPECIALIZATION(double2); VEC_SPECIALIZATION(double3); -_CCCL_SUPPRESS_DEPRECATED_PUSH -VEC_SPECIALIZATION(double4); -_CCCL_SUPPRESS_DEPRECATED_POP # if _CCCL_CTK_AT_LEAST(13, 0) VEC_SPECIALIZATION(double4_16a); VEC_SPECIALIZATION(double4_32a); +# else +VEC_SPECIALIZATION(double4); # endif // _CCCL_CTK_AT_LEAST(13, 0) # if TEST_HALF_T() @@ -167,8 +163,3 @@ VEC_GEN_MOD_SPECIALIZATION(ulonglong4, unsigned long long); VEC_GEN_MOD_SPECIALIZATION(ushort4, unsigned short); #endif // THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_CUDA } // namespace c2h::detail - -// Suppress deprecation warnings for use of vector types in the `*cudafe1.stub.c` file -#if _CCCL_CTK_AT_LEAST(13, 0) && _CCCL_COMPILER(CLANG) -_CCCL_SUPPRESS_DEPRECATED_PUSH -#endif // _CCCL_CTK_AT_LEAST(13, 0) && _CCCL_COMPILER(CLANG) From 56bc38934e16f4aaf672e3b2d3652f574cc6dbe8 Mon Sep 17 00:00:00 2001 From: Allison Piper Date: Sun, 26 Oct 2025 13:18:53 +0000 Subject: [PATCH 06/23] More missing headers. --- c/parallel/test/test_histogram.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/c/parallel/test/test_histogram.cpp b/c/parallel/test/test_histogram.cpp index d25f5dff4a9..1c05bb36ab8 100644 --- a/c/parallel/test/test_histogram.cpp +++ b/c/parallel/test/test_histogram.cpp @@ -8,7 +8,9 @@ // //===----------------------------------------------------------------------===// +#include #include +#include #include From b2ff961da75a992651ea40df0cd83710b938b687 Mon Sep 17 00:00:00 2001 From: Allison Piper Date: Sun, 26 Oct 2025 09:42:33 -0400 Subject: [PATCH 07/23] Fix compare functor type in radix sort test. --- c/parallel/test/test_radix_sort.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/c/parallel/test/test_radix_sort.cpp b/c/parallel/test/test_radix_sort.cpp index b8ebc79116e..c2e67092003 100644 --- a/c/parallel/test/test_radix_sort.cpp +++ b/c/parallel/test/test_radix_sort.cpp @@ -314,7 +314,7 @@ C2H_TEST("DeviceRadixSort::SortPairs works", "[radix_sort]", test_params_tuple) if (is_descending) { std::sort(expected_keys.begin(), expected_keys.end(), std::greater()); - std::sort(expected_items.begin(), expected_items.end(), std::greater()); + std::sort(expected_items.begin(), expected_items.end(), std::greater()); } else { From 37ec5d583c7cd1a8de67a4c6b8d278358088561e Mon Sep 17 00:00:00 2001 From: Allison Piper Date: Sun, 26 Oct 2025 10:04:49 -0400 Subject: [PATCH 08/23] Move test types to type list. MSVC doesn't like preprocessor checks inside the test macros. --- c/parallel/test/test_segmented_reduce.cpp | 31 +++++++++++++---------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/c/parallel/test/test_segmented_reduce.cpp b/c/parallel/test/test_segmented_reduce.cpp index 98b14d0130a..2bdaaae06d0 100644 --- a/c/parallel/test/test_segmented_reduce.cpp +++ b/c/parallel/test/test_segmented_reduce.cpp @@ -604,15 +604,18 @@ C2H_TEST("SegmentedReduce works with input iterators", "[segmented_reduce]") REQUIRE(host_actual == host_output); } -struct SegmentedReduce_SumOverRows_FloatingPointTypes_Fixture_Tag; -C2H_TEST_LIST("segmented_reduce can work with floating point types", - "[segmented_reduce]", +using fp_test_types = c2h::type_list< #if _CCCL_HAS_NVFP16() - __half, +__half, #endif - float, - double) +float, double +>; +struct SegmentedReduce_SumOverRows_FloatingPointTypes_Fixture_Tag; +C2H_TEST("segmented_reduce can work with floating point types", + "[segmented_reduce]", fp_test_types) { + using T = c2h::get<0, TestType>; + constexpr std::size_t n_rows = 13; constexpr std::size_t n_cols = 12; @@ -620,11 +623,11 @@ C2H_TEST_LIST("segmented_reduce can work with floating point types", constexpr std::size_t row_size = n_cols; const std::vector int_input = generate(n_elems); - const std::vector input(int_input.begin(), int_input.end()); - std::vector output(n_rows, 0); + const std::vector input(int_input.begin(), int_input.end()); + std::vector output(n_rows, 0); - pointer_t input_ptr(input); // copy from host to device - pointer_t output_ptr(output); // copy from host to device + pointer_t input_ptr(input); // copy from host to device + pointer_t output_ptr(output); // copy from host to device using SizeT = unsigned long long; static constexpr std::string_view index_ty_name = "unsigned long long"; @@ -659,11 +662,11 @@ C2H_TEST_LIST("segmented_reduce can work with floating point types", end_offset_it.state.linear_id = 1; end_offset_it.state.row_size = row_size; - operation_t op = make_operation("op", get_reduce_op(get_type_info().type)); - value_t init{0}; + operation_t op = make_operation("op", get_reduce_op(get_type_info().type)); + value_t init{0}; auto& build_cache = get_cache(); - const auto& test_key = make_key(); + const auto& test_key = make_key(); segmented_reduce(input_ptr, output_ptr, n_rows, start_offset_it, end_offset_it, op, init, build_cache, test_key); @@ -675,7 +678,7 @@ C2H_TEST_LIST("segmented_reduce can work with floating point types", std::size_t row_offset = i * row_size; host_output_it[i] = std::reduce(host_input_it + row_offset, host_input_it + (row_offset + n_cols)); } - REQUIRE(output == std::vector(output_ptr)); + REQUIRE(output == std::vector(output_ptr)); } template From dbf681c42fd8ee179c5fa58e0d26e2628c6ee07f Mon Sep 17 00:00:00 2001 From: Allison Piper Date: Sun, 26 Oct 2025 10:25:17 -0400 Subject: [PATCH 09/23] Add support for 8-bit ints in generate helper. --- c/parallel/test/test_util.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/c/parallel/test/test_util.h b/c/parallel/test/test_util.h index 848237a2dd1..7033e38d0b0 100644 --- a/c/parallel/test/test_util.h +++ b/c/parallel/test/test_util.h @@ -115,12 +115,19 @@ inline std::string compile(const std::string& source) template std::vector generate(std::size_t num_items) { + // Add support for 8-bit ints, otherwise MSVC fails with: + // error C2338: static_assert failed: + // 'invalid template argument for uniform_int_distribution: + // N4950 [rand.req.genl]/1.5 requires one of + // short, int, long, long long, + // unsigned short, unsigned int, unsigned long, or unsigned long long' + using dist_type = std::conditional_t; std::random_device rnd_device; std::mt19937 mersenne_engine{rnd_device()}; // Generates random integers - std::uniform_int_distribution dist{T{1}, T{42}}; + std::uniform_int_distribution dist{dist_type{1}, dist_type{42}}; std::vector vec(num_items); std::generate(vec.begin(), vec.end(), [&]() { - return dist(mersenne_engine); + return static_cast(dist(mersenne_engine)); }); return vec; } From 35d16da19e931988b0dc675fa766b22ce758c529 Mon Sep 17 00:00:00 2001 From: Allison Piper Date: Sun, 26 Oct 2025 10:28:57 -0400 Subject: [PATCH 10/23] Suppress harmless type conversion warnings on MSVC. --- c/parallel/test/test_segmented_reduce.cpp | 4 ++++ c/parallel/test/test_transform.cpp | 4 ++++ c/parallel/test/test_unique_by_key.cpp | 4 ++++ 3 files changed, 12 insertions(+) diff --git a/c/parallel/test/test_segmented_reduce.cpp b/c/parallel/test/test_segmented_reduce.cpp index 2bdaaae06d0..e5a5071252e 100644 --- a/c/parallel/test/test_segmented_reduce.cpp +++ b/c/parallel/test/test_segmented_reduce.cpp @@ -623,7 +623,11 @@ C2H_TEST("segmented_reduce can work with floating point types", constexpr std::size_t row_size = n_cols; const std::vector int_input = generate(n_elems); + // Suppress harmless conversion warnings on MSVC + _CCCL_DIAG_PUSH + _CCCL_DIAG_SUPPRESS_MSVC(4244) const std::vector input(int_input.begin(), int_input.end()); + _CCCL_DIAG_POP std::vector output(n_rows, 0); pointer_t input_ptr(input); // copy from host to device diff --git a/c/parallel/test/test_transform.cpp b/c/parallel/test/test_transform.cpp index 1824e589fda..c803d67f13b 100644 --- a/c/parallel/test/test_transform.cpp +++ b/c/parallel/test/test_transform.cpp @@ -536,7 +536,11 @@ C2H_TEST("Transform works with floating point types", "[transform]", floating_po const std::size_t num_items = GENERATE(0, 42, take(4, random(1 << 12, 1 << 16))); operation_t op = make_operation("op", get_unary_op(get_type_info().type)); const std::vector int_input = generate(num_items); + // Suppress harmless conversion warnings on MSVC + _CCCL_DIAG_PUSH + _CCCL_DIAG_SUPPRESS_MSVC(4244) const std::vector input(int_input.begin(), int_input.end()); + _CCCL_DIAG_POP const std::vector output(num_items, 0); pointer_t input_ptr(input); pointer_t output_ptr(output); diff --git a/c/parallel/test/test_unique_by_key.cpp b/c/parallel/test/test_unique_by_key.cpp index 6fde9f24018..b89a5042957 100644 --- a/c/parallel/test/test_unique_by_key.cpp +++ b/c/parallel/test/test_unique_by_key.cpp @@ -218,7 +218,11 @@ C2H_TEST("DeviceSelect::UniqueByKey works with floating point types", "[unique_b operation_t op = make_operation("op", get_unique_by_key_op(get_type_info().type)); const std::vector int_input = generate(num_items); + // Suppress harmless conversion warnings on MSVC + _CCCL_DIAG_PUSH + _CCCL_DIAG_SUPPRESS_MSVC(4244) const std::vector input_keys(int_input.begin(), int_input.end()); + _CCCL_DIAG_POP std::vector input_values = generate(num_items); pointer_t input_keys_it(input_keys); From 26fea2e6fe0a38973fac4af2c1865b3c43200118 Mon Sep 17 00:00:00 2001 From: Allison Piper Date: Sun, 26 Oct 2025 10:40:48 -0400 Subject: [PATCH 11/23] Fix overeager maybe-uninitialized warning. --- c/parallel/test/test_transform.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/c/parallel/test/test_transform.cpp b/c/parallel/test/test_transform.cpp index c803d67f13b..8fa9ad7cf9a 100644 --- a/c/parallel/test/test_transform.cpp +++ b/c/parallel/test/test_transform.cpp @@ -109,7 +109,7 @@ C2H_TEST("Transform generates UBLKCP on SM90", "[transform][ublkcp]") return; } - cccl_device_transform_build_result_t build; + cccl_device_transform_build_result_t build{}; operation_t op = make_operation("op", get_unary_op(get_type_info().type)); REQUIRE( CUDA_SUCCESS From 80526b683424ece0f94419b743c9c44786ac2a03 Mon Sep 17 00:00:00 2001 From: Allison Piper Date: Sun, 26 Oct 2025 14:41:24 +0000 Subject: [PATCH 12/23] Formatting. --- c/parallel/test/test_segmented_reduce.cpp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/c/parallel/test/test_segmented_reduce.cpp b/c/parallel/test/test_segmented_reduce.cpp index e5a5071252e..6cb13a20caf 100644 --- a/c/parallel/test/test_segmented_reduce.cpp +++ b/c/parallel/test/test_segmented_reduce.cpp @@ -606,13 +606,12 @@ C2H_TEST("SegmentedReduce works with input iterators", "[segmented_reduce]") using fp_test_types = c2h::type_list< #if _CCCL_HAS_NVFP16() -__half, + __half, #endif -float, double ->; + float, + double>; struct SegmentedReduce_SumOverRows_FloatingPointTypes_Fixture_Tag; -C2H_TEST("segmented_reduce can work with floating point types", - "[segmented_reduce]", fp_test_types) +C2H_TEST("segmented_reduce can work with floating point types", "[segmented_reduce]", fp_test_types) { using T = c2h::get<0, TestType>; From 5fea47a6f45d3a96919083ff3ab6e9a2e60e2c80 Mon Sep 17 00:00:00 2001 From: Allison Piper Date: Sun, 26 Oct 2025 10:58:33 -0400 Subject: [PATCH 13/23] Fix type conversion warnings. --- c/parallel/test/test_histogram.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/c/parallel/test/test_histogram.cpp b/c/parallel/test/test_histogram.cpp index 1c05bb36ab8..3df1ad6effd 100644 --- a/c/parallel/test/test_histogram.cpp +++ b/c/parallel/test/test_histogram.cpp @@ -208,7 +208,7 @@ C2H_TEST("DeviceHistogram::HistogramEven API usage", "[histogram][device]") using counter_t = int; int num_samples = 10; - std::vector d_samples{2.2, 6.1, 7.1, 2.9, 3.5, 0.3, 2.9, 2.1, 6.1, 999.5}; + std::vector d_samples{2.2f, 6.1f, 7.1f, 2.9f, 3.5f, 0.3f, 2.9f, 2.1f, 6.1f, 999.5f}; int num_rows = 1; From 00de8f8b26138d1e42b2ba6611450a614a72d05f Mon Sep 17 00:00:00 2001 From: Allison Piper Date: Sun, 26 Oct 2025 10:58:48 -0400 Subject: [PATCH 14/23] Fix shadowed alias identifier. --- c/parallel/test/test_merge_sort.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/c/parallel/test/test_merge_sort.cpp b/c/parallel/test/test_merge_sort.cpp index d5f76870779..628d256f997 100644 --- a/c/parallel/test/test_merge_sort.cpp +++ b/c/parallel/test/test_merge_sort.cpp @@ -386,7 +386,7 @@ struct DeviceMergeSort_SortPairs_Iterators_Fixture_Tag; C2H_TEST("DeviceMergeSort::SortPairs works with input iterators", "[merge_sort]") { using key_t = int; - using item_t = int; + using int_item_t = int; const int num_items = GENERATE_COPY(take(2, random(1, 1000000)), values({500, 1000000, 2000000})); operation_t op = make_operation("op", get_merge_sort_op(get_type_info().type)); @@ -396,13 +396,13 @@ C2H_TEST("DeviceMergeSort::SortPairs works with input iterators", "[merge_sort]" make_random_access_iterator(iterator_kind::INPUT, "int", "item"); std::vector input_keys = make_shuffled_sequence(num_items); - std::vector input_items(num_items); + std::vector input_items(num_items); std::transform(input_keys.begin(), input_keys.end(), input_items.begin(), [](key_t key) { - return static_cast(key); + return static_cast(key); }); std::vector expected_keys = input_keys; - std::vector expected_items = input_items; + std::vector expected_items = input_items; pointer_t input_keys_ptr(input_keys); input_keys_it.state.data = input_keys_ptr.ptr; @@ -410,14 +410,14 @@ C2H_TEST("DeviceMergeSort::SortPairs works with input iterators", "[merge_sort]" input_items_it.state.data = input_items_ptr.ptr; auto& build_cache = get_cache(); - const auto& test_key = make_key(); + const auto& test_key = make_key(); merge_sort(input_keys_it, input_items_it, input_keys_ptr, input_items_ptr, num_items, op, build_cache, test_key); std::sort(expected_keys.begin(), expected_keys.end()); std::sort(expected_items.begin(), expected_items.end()); REQUIRE(expected_keys == std::vector(input_keys_ptr)); - REQUIRE(expected_items == std::vector(input_items_ptr)); + REQUIRE(expected_items == std::vector(input_items_ptr)); } // These tests with output iterators are currently failing https://github.com/NVIDIA/cccl/issues/3722 From d256cb5a07236e995db1c9b7b6c2ca2f4de4b407 Mon Sep 17 00:00:00 2001 From: Allison Piper Date: Sun, 26 Oct 2025 14:59:51 +0000 Subject: [PATCH 15/23] Formatting. --- c/parallel/test/test_merge_sort.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/c/parallel/test/test_merge_sort.cpp b/c/parallel/test/test_merge_sort.cpp index 628d256f997..fc87a6ee98e 100644 --- a/c/parallel/test/test_merge_sort.cpp +++ b/c/parallel/test/test_merge_sort.cpp @@ -401,7 +401,7 @@ C2H_TEST("DeviceMergeSort::SortPairs works with input iterators", "[merge_sort]" return static_cast(key); }); - std::vector expected_keys = input_keys; + std::vector expected_keys = input_keys; std::vector expected_items = input_items; pointer_t input_keys_ptr(input_keys); From 6ca186da3cfd03568ba66d93243c6f5553475c0c Mon Sep 17 00:00:00 2001 From: Allison Piper Date: Sun, 26 Oct 2025 12:19:34 -0400 Subject: [PATCH 16/23] Fix conversion warning. --- c/parallel/test/test_reduce.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/c/parallel/test/test_reduce.cpp b/c/parallel/test/test_reduce.cpp index 32e2c321e91..adbb00d7395 100644 --- a/c/parallel/test/test_reduce.cpp +++ b/c/parallel/test/test_reduce.cpp @@ -269,7 +269,7 @@ C2H_TEST("Reduce works with input iterators", "[reduce]") reduce(input_it, output_it, num_items, op, init, build_cache, test_key); const int output = output_it[0]; - const int expected = init.value + num_items * (num_items - 1) / 2; + const int expected = init.value + static_cast(num_items * (num_items - 1) / 2); REQUIRE(output == expected); } From afda68f3da99098f83fab777de6f62465a5ae639 Mon Sep 17 00:00:00 2001 From: Allison Piper Date: Sun, 26 Oct 2025 12:24:50 -0400 Subject: [PATCH 17/23] Fix conversion warnings. --- c/parallel/test/test_segmented_reduce.cpp | 2 +- c/parallel/test/test_three_way_partition.cpp | 10 +++++----- c/parallel/test/test_transform.cpp | 4 +++- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/c/parallel/test/test_segmented_reduce.cpp b/c/parallel/test/test_segmented_reduce.cpp index 6cb13a20caf..219a6c450c6 100644 --- a/c/parallel/test/test_segmented_reduce.cpp +++ b/c/parallel/test/test_segmented_reduce.cpp @@ -526,7 +526,7 @@ C2H_TEST("SegmentedReduce works with input iterators", "[segmented_reduce]") auto inp_ = generate(n_elems); for (auto&& el : inp_) { - host_input.push_back(el); + host_input.push_back(static_cast(el)); } } std::vector host_output(n_cols, 0); diff --git a/c/parallel/test/test_three_way_partition.cpp b/c/parallel/test/test_three_way_partition.cpp index 8b1dc1dbddd..e393c72fc89 100644 --- a/c/parallel/test/test_three_way_partition.cpp +++ b/c/parallel/test/test_three_way_partition.cpp @@ -91,13 +91,13 @@ template struct three_way_partition_result_t { three_way_partition_result_t() = delete; - explicit three_way_partition_result_t(int num_items) + explicit three_way_partition_result_t(std::size_t num_items) : first_part(num_items) , second_part(num_items) , unselected(num_items) {} explicit three_way_partition_result_t( - std::vector first, std::vector second, std::vector unselected, int n_first, int n_second, int n_unselected) + std::vector first, std::vector second, std::vector unselected, std::size_t n_first, std::size_t n_second, std::size_t n_unselected) : first_part(std::move(first)) , second_part(std::move(second)) , unselected(std::move(unselected)) @@ -110,9 +110,9 @@ struct three_way_partition_result_t std::vector second_part; std::vector unselected; - int num_items_in_first_part{}; - int num_items_in_second_part{}; - int num_unselected_items{}; + std::size_t num_items_in_first_part{}; + std::size_t num_items_in_second_part{}; + std::size_t num_unselected_items{}; bool operator==(const three_way_partition_result_t& other) const { diff --git a/c/parallel/test/test_transform.cpp b/c/parallel/test/test_transform.cpp index 8fa9ad7cf9a..2379e432189 100644 --- a/c/parallel/test/test_transform.cpp +++ b/c/parallel/test/test_transform.cpp @@ -254,10 +254,12 @@ C2H_TEST("Transform works with integral types with well-known operations", "[tra unary_transform(input_ptr, output_ptr, num_items, op, build_cache, test_key); std::vector expected(num_items, 0); + _CCCL_DIAG_PUSH + _CCCL_DIAG_SUPPRESS_MSVC(4146) // unary minus on unsigned type std::transform(input.begin(), input.end(), expected.begin(), [](const T& x) { return -x; }); - + _CCCL_DIAG_POP if (num_items > 0) { REQUIRE(expected == std::vector(output_ptr)); From a68ee38ab01142193df196b0dc069616ec7f8d06 Mon Sep 17 00:00:00 2001 From: Allison Piper Date: Sun, 26 Oct 2025 16:35:12 +0000 Subject: [PATCH 18/23] Formatting. --- c/parallel/test/test_three_way_partition.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/c/parallel/test/test_three_way_partition.cpp b/c/parallel/test/test_three_way_partition.cpp index e393c72fc89..0df644d2ec1 100644 --- a/c/parallel/test/test_three_way_partition.cpp +++ b/c/parallel/test/test_three_way_partition.cpp @@ -97,7 +97,12 @@ struct three_way_partition_result_t , unselected(num_items) {} explicit three_way_partition_result_t( - std::vector first, std::vector second, std::vector unselected, std::size_t n_first, std::size_t n_second, std::size_t n_unselected) + std::vector first, + std::vector second, + std::vector unselected, + std::size_t n_first, + std::size_t n_second, + std::size_t n_unselected) : first_part(std::move(first)) , second_part(std::move(second)) , unselected(std::move(unselected)) From 4f9c16198030c404f59753cb413eba5e1a01d8dd Mon Sep 17 00:00:00 2001 From: Allison Piper Date: Sun, 26 Oct 2025 18:03:26 +0000 Subject: [PATCH 19/23] Remove override matrix. --- ci/matrix.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/ci/matrix.yaml b/ci/matrix.yaml index f493b582eda..3889faf2bc3 100644 --- a/ci/matrix.yaml +++ b/ci/matrix.yaml @@ -21,8 +21,6 @@ workflows: # args: '--preset libcudacxx-cpp20 --lit-tests "cuda/utility/basic_any.pass.cpp"' } # override: - - {jobs: ['test'], project: ['cccl_c_parallel'], ctk: '12.X', cxx: ['msvc'], gpu: ['rtx2080']} - - {jobs: ['test'], project: ['cccl_c_parallel'], ctk: '13.X', cxx: ['msvc'], gpu: ['rtx2080', 'l4', 'h100']} pull_request: # Old CTK: Oldest/newest supported host compilers: From 7827866707e7627e013ddebbc9d31c860afaf073 Mon Sep 17 00:00:00 2001 From: Allison Piper Date: Sun, 26 Oct 2025 19:19:45 +0000 Subject: [PATCH 20/23] More conversion warnings. --- c/parallel/test/test_three_way_partition.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/c/parallel/test/test_three_way_partition.cpp b/c/parallel/test/test_three_way_partition.cpp index 0df644d2ec1..67b14c116e9 100644 --- a/c/parallel/test/test_three_way_partition.cpp +++ b/c/parallel/test/test_three_way_partition.cpp @@ -505,7 +505,7 @@ C2H_TEST("ThreeWayPartition works with iterators", "[three_way_partition]") REQUIRE(first_part_output == std_result.first_part); REQUIRE(second_part_output == std_result.second_part); REQUIRE(unselected_output == std_result.unselected); - REQUIRE(num_selected[0] == std_result.num_items_in_first_part); - REQUIRE(num_selected[1] == std_result.num_items_in_second_part); - REQUIRE(static_cast(num_items) - num_selected[0] - num_selected[1] == std_result.num_unselected_items); + REQUIRE(static_cast(num_selected[0]) == std_result.num_items_in_first_part); + REQUIRE(static_cast(num_selected[1]) == std_result.num_items_in_second_part); + REQUIRE(num_items - static_cast(num_selected[0] - num_selected[1]) == std_result.num_unselected_items); } From 3254d2b9e2a956a0ab92833fdc23e6c1b63d3243 Mon Sep 17 00:00:00 2001 From: Allison Piper Date: Sun, 26 Oct 2025 19:20:05 +0000 Subject: [PATCH 21/23] Switch C2H to C++17 to support gcc<10. --- c2h/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/c2h/CMakeLists.txt b/c2h/CMakeLists.txt index 661398ca4fb..dc60457ed25 100644 --- a/c2h/CMakeLists.txt +++ b/c2h/CMakeLists.txt @@ -28,9 +28,9 @@ target_include_directories(cccl.c2h PUBLIC "${C2H_SOURCE_DIR}/include") target_link_libraries(cccl.c2h PUBLIC CCCL::CCCL Catch2::Catch2 - cccl.compiler_interface_cpp20 + cccl.compiler_interface_cpp17 ) -cccl_configure_target(cccl.c2h DIALECT 20) +cccl_configure_target(cccl.c2h DIALECT 17) if (C2H_ENABLE_CURAND) target_link_libraries(cccl.c2h PRIVATE CUDA::curand) From 4febfcf3651b837c035976530bd651b207389469 Mon Sep 17 00:00:00 2001 From: Allison Piper Date: Sun, 26 Oct 2025 20:01:10 +0000 Subject: [PATCH 22/23] Fix logic error in refactored cast. --- c/parallel/test/test_three_way_partition.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/c/parallel/test/test_three_way_partition.cpp b/c/parallel/test/test_three_way_partition.cpp index 67b14c116e9..2bf9c65c2c8 100644 --- a/c/parallel/test/test_three_way_partition.cpp +++ b/c/parallel/test/test_three_way_partition.cpp @@ -507,5 +507,5 @@ C2H_TEST("ThreeWayPartition works with iterators", "[three_way_partition]") REQUIRE(unselected_output == std_result.unselected); REQUIRE(static_cast(num_selected[0]) == std_result.num_items_in_first_part); REQUIRE(static_cast(num_selected[1]) == std_result.num_items_in_second_part); - REQUIRE(num_items - static_cast(num_selected[0] - num_selected[1]) == std_result.num_unselected_items); + REQUIRE(num_items - static_cast(num_selected[0] + num_selected[1]) == std_result.num_unselected_items); } From bc9bb737d0bc1c494245c257d6faada099a2ace3 Mon Sep 17 00:00:00 2001 From: Allison Piper Date: Sun, 26 Oct 2025 20:01:37 +0000 Subject: [PATCH 23/23] Drop the filename extension from the c parallel test targets. --- c/parallel/test/CMakeLists.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/c/parallel/test/CMakeLists.txt b/c/parallel/test/CMakeLists.txt index d81d2527e90..1de68f0c5cc 100644 --- a/c/parallel/test/CMakeLists.txt +++ b/c/parallel/test/CMakeLists.txt @@ -1,7 +1,8 @@ cccl_get_c2h() function(cccl_c_parallel_add_test target_name_var source) - string(REGEX REPLACE "test_([^.]*)" "cccl.c.parallel.test.\\1" target_name "${source}") + get_filename_component(target_name "${source}" NAME_WE) + string(REGEX REPLACE "test_([^.]*)" "cccl.c.parallel.test.\\1" target_name "${target_name}") set(target_name_var ${target_name} PARENT_SCOPE) add_executable(${target_name} "${source}")