Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion c/parallel/test/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
cccl_get_c2h()

function(cccl_c_parallel_add_test target_name_var source)
string(REGEX REPLACE "test_([^.]*)" "cccl.c.parallel.test.\\1" target_name "${source}")
get_filename_component(target_name "${source}" NAME_WE)
string(REGEX REPLACE "test_([^.]*)" "cccl.c.parallel.test.\\1" target_name "${target_name}")
set(target_name_var ${target_name} PARENT_SCOPE)

add_executable(${target_name} "${source}")
Expand Down
2 changes: 1 addition & 1 deletion c/parallel/test/algorithm_execution.h
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ void AlgorithmExecute(std::optional<BuildCache>& cache, const std::optional<KeyT
constexpr int device_id = 0;
const auto& build_info = BuildInformation<device_id>::init();

BuildResultT build;
BuildResultT build{};

bool found = false;
const bool cache_and_key = bool(cache) && bool(lookup_key);
Expand Down
4 changes: 3 additions & 1 deletion c/parallel/test/test_histogram.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@
//
//===----------------------------------------------------------------------===//

#include <array>
#include <cstdint>
#include <vector>

#include <cuda_runtime.h>

Expand Down Expand Up @@ -206,7 +208,7 @@ C2H_TEST("DeviceHistogram::HistogramEven API usage", "[histogram][device]")
using counter_t = int;

int num_samples = 10;
std::vector<float> d_samples{2.2, 6.1, 7.1, 2.9, 3.5, 0.3, 2.9, 2.1, 6.1, 999.5};
std::vector<float> d_samples{2.2f, 6.1f, 7.1f, 2.9f, 3.5f, 0.3f, 2.9f, 2.1f, 6.1f, 999.5f};

int num_rows = 1;

Expand Down
14 changes: 7 additions & 7 deletions c/parallel/test/test_merge_sort.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -386,7 +386,7 @@ struct DeviceMergeSort_SortPairs_Iterators_Fixture_Tag;
C2H_TEST("DeviceMergeSort::SortPairs works with input iterators", "[merge_sort]")
{
using key_t = int;
using item_t = int;
using int_item_t = int;
const int num_items = GENERATE_COPY(take(2, random(1, 1000000)), values({500, 1000000, 2000000}));

operation_t op = make_operation("op", get_merge_sort_op(get_type_info<key_t>().type));
Expand All @@ -396,28 +396,28 @@ C2H_TEST("DeviceMergeSort::SortPairs works with input iterators", "[merge_sort]"
make_random_access_iterator<key_t>(iterator_kind::INPUT, "int", "item");

std::vector<key_t> input_keys = make_shuffled_sequence<key_t>(num_items);
std::vector<item_t> input_items(num_items);
std::vector<int_item_t> input_items(num_items);
std::transform(input_keys.begin(), input_keys.end(), input_items.begin(), [](key_t key) {
return static_cast<item_t>(key);
return static_cast<int_item_t>(key);
});

std::vector<key_t> expected_keys = input_keys;
std::vector<item_t> expected_items = input_items;
std::vector<key_t> expected_keys = input_keys;
std::vector<int_item_t> expected_items = input_items;

pointer_t<key_t> input_keys_ptr(input_keys);
input_keys_it.state.data = input_keys_ptr.ptr;
pointer_t<key_t> input_items_ptr(input_items);
input_items_it.state.data = input_items_ptr.ptr;

auto& build_cache = get_cache<DeviceMergeSort_SortPairs_Iterators_Fixture_Tag>();
const auto& test_key = make_key<key_t, item_t>();
const auto& test_key = make_key<key_t, int_item_t>();

merge_sort(input_keys_it, input_items_it, input_keys_ptr, input_items_ptr, num_items, op, build_cache, test_key);

std::sort(expected_keys.begin(), expected_keys.end());
std::sort(expected_items.begin(), expected_items.end());
REQUIRE(expected_keys == std::vector<key_t>(input_keys_ptr));
REQUIRE(expected_items == std::vector<item_t>(input_items_ptr));
REQUIRE(expected_items == std::vector<int_item_t>(input_items_ptr));
}

// These tests with output iterators are currently failing https://github.com/NVIDIA/cccl/issues/3722
Expand Down
2 changes: 1 addition & 1 deletion c/parallel/test/test_radix_sort.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,7 @@ C2H_TEST("DeviceRadixSort::SortPairs works", "[radix_sort]", test_params_tuple)
if (is_descending)
{
std::sort(expected_keys.begin(), expected_keys.end(), std::greater<KeyT>());
std::sort(expected_items.begin(), expected_items.end(), std::greater<KeyT>());
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oof, how was that working on Linux?!

std::sort(expected_items.begin(), expected_items.end(), std::greater<ItemT>());
}
else
{
Expand Down
2 changes: 1 addition & 1 deletion c/parallel/test/test_reduce.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,7 @@ C2H_TEST("Reduce works with input iterators", "[reduce]")
reduce(input_it, output_it, num_items, op, init, build_cache, test_key);

const int output = output_it[0];
const int expected = init.value + num_items * (num_items - 1) / 2;
const int expected = init.value + static_cast<int>(num_items * (num_items - 1) / 2);
REQUIRE(output == expected);
}

Expand Down
36 changes: 21 additions & 15 deletions c/parallel/test/test_segmented_reduce.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -526,7 +526,7 @@ C2H_TEST("SegmentedReduce works with input iterators", "[segmented_reduce]")
auto inp_ = generate<int>(n_elems);
for (auto&& el : inp_)
{
host_input.push_back(el);
host_input.push_back(static_cast<ValueT>(el));
}
}
std::vector<ValueT> host_output(n_cols, 0);
Expand Down Expand Up @@ -604,27 +604,33 @@ C2H_TEST("SegmentedReduce works with input iterators", "[segmented_reduce]")
REQUIRE(host_actual == host_output);
}

struct SegmentedReduce_SumOverRows_FloatingPointTypes_Fixture_Tag;
C2H_TEST_LIST("segmented_reduce can work with floating point types",
"[segmented_reduce]",
using fp_test_types = c2h::type_list<
#if _CCCL_HAS_NVFP16()
__half,
__half,
#endif
float,
double)
float,
double>;
struct SegmentedReduce_SumOverRows_FloatingPointTypes_Fixture_Tag;
C2H_TEST("segmented_reduce can work with floating point types", "[segmented_reduce]", fp_test_types)
{
using T = c2h::get<0, TestType>;

constexpr std::size_t n_rows = 13;
constexpr std::size_t n_cols = 12;

constexpr std::size_t n_elems = n_rows * n_cols;
constexpr std::size_t row_size = n_cols;

const std::vector<int> int_input = generate<int>(n_elems);
const std::vector<TestType> input(int_input.begin(), int_input.end());
std::vector<TestType> output(n_rows, 0);
// Suppress harmless conversion warnings on MSVC
_CCCL_DIAG_PUSH
_CCCL_DIAG_SUPPRESS_MSVC(4244)
const std::vector<T> input(int_input.begin(), int_input.end());
_CCCL_DIAG_POP
std::vector<T> output(n_rows, 0);

pointer_t<TestType> input_ptr(input); // copy from host to device
pointer_t<TestType> output_ptr(output); // copy from host to device
pointer_t<T> input_ptr(input); // copy from host to device
pointer_t<T> output_ptr(output); // copy from host to device

using SizeT = unsigned long long;
static constexpr std::string_view index_ty_name = "unsigned long long";
Expand Down Expand Up @@ -659,11 +665,11 @@ C2H_TEST_LIST("segmented_reduce can work with floating point types",
end_offset_it.state.linear_id = 1;
end_offset_it.state.row_size = row_size;

operation_t op = make_operation("op", get_reduce_op(get_type_info<TestType>().type));
value_t<TestType> init{0};
operation_t op = make_operation("op", get_reduce_op(get_type_info<T>().type));
value_t<T> init{0};

auto& build_cache = get_cache<SegmentedReduce_SumOverRows_FloatingPointTypes_Fixture_Tag>();
const auto& test_key = make_key<TestType>();
const auto& test_key = make_key<T>();

segmented_reduce(input_ptr, output_ptr, n_rows, start_offset_it, end_offset_it, op, init, build_cache, test_key);

Expand All @@ -675,7 +681,7 @@ C2H_TEST_LIST("segmented_reduce can work with floating point types",
std::size_t row_offset = i * row_size;
host_output_it[i] = std::reduce(host_input_it + row_offset, host_input_it + (row_offset + n_cols));
}
REQUIRE(output == std::vector<TestType>(output_ptr));
REQUIRE(output == std::vector<T>(output_ptr));
}

template <typename ValueT>
Expand Down
21 changes: 13 additions & 8 deletions c/parallel/test/test_three_way_partition.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -91,13 +91,18 @@ template <typename T>
struct three_way_partition_result_t
{
three_way_partition_result_t() = delete;
explicit three_way_partition_result_t(int num_items)
explicit three_way_partition_result_t(std::size_t num_items)
: first_part(num_items)
, second_part(num_items)
, unselected(num_items)
{}
explicit three_way_partition_result_t(
std::vector<T> first, std::vector<T> second, std::vector<T> unselected, int n_first, int n_second, int n_unselected)
std::vector<T> first,
std::vector<T> second,
std::vector<T> unselected,
std::size_t n_first,
std::size_t n_second,
std::size_t n_unselected)
: first_part(std::move(first))
, second_part(std::move(second))
, unselected(std::move(unselected))
Expand All @@ -110,9 +115,9 @@ struct three_way_partition_result_t
std::vector<T> second_part;
std::vector<T> unselected;

int num_items_in_first_part{};
int num_items_in_second_part{};
int num_unselected_items{};
std::size_t num_items_in_first_part{};
std::size_t num_items_in_second_part{};
std::size_t num_unselected_items{};

bool operator==(const three_way_partition_result_t<T>& other) const
{
Expand Down Expand Up @@ -500,7 +505,7 @@ C2H_TEST("ThreeWayPartition works with iterators", "[three_way_partition]")
REQUIRE(first_part_output == std_result.first_part);
REQUIRE(second_part_output == std_result.second_part);
REQUIRE(unselected_output == std_result.unselected);
REQUIRE(num_selected[0] == std_result.num_items_in_first_part);
REQUIRE(num_selected[1] == std_result.num_items_in_second_part);
REQUIRE(static_cast<int>(num_items) - num_selected[0] - num_selected[1] == std_result.num_unselected_items);
REQUIRE(static_cast<std::size_t>(num_selected[0]) == std_result.num_items_in_first_part);
REQUIRE(static_cast<std::size_t>(num_selected[1]) == std_result.num_items_in_second_part);
REQUIRE(num_items - static_cast<std::size_t>(num_selected[0] + num_selected[1]) == std_result.num_unselected_items);
}
10 changes: 8 additions & 2 deletions c/parallel/test/test_transform.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ C2H_TEST("Transform generates UBLKCP on SM90", "[transform][ublkcp]")
return;
}

cccl_device_transform_build_result_t build;
cccl_device_transform_build_result_t build{};
operation_t op = make_operation("op", get_unary_op(get_type_info<int>().type));
REQUIRE(
CUDA_SUCCESS
Expand Down Expand Up @@ -254,10 +254,12 @@ C2H_TEST("Transform works with integral types with well-known operations", "[tra
unary_transform(input_ptr, output_ptr, num_items, op, build_cache, test_key);

std::vector<T> expected(num_items, 0);
_CCCL_DIAG_PUSH
_CCCL_DIAG_SUPPRESS_MSVC(4146) // unary minus on unsigned type
std::transform(input.begin(), input.end(), expected.begin(), [](const T& x) {
return -x;
});

_CCCL_DIAG_POP
if (num_items > 0)
{
REQUIRE(expected == std::vector<T>(output_ptr));
Expand Down Expand Up @@ -536,7 +538,11 @@ C2H_TEST("Transform works with floating point types", "[transform]", floating_po
const std::size_t num_items = GENERATE(0, 42, take(4, random(1 << 12, 1 << 16)));
operation_t op = make_operation("op", get_unary_op(get_type_info<T>().type));
const std::vector<int> int_input = generate<int>(num_items);
// Suppress harmless conversion warnings on MSVC
_CCCL_DIAG_PUSH
_CCCL_DIAG_SUPPRESS_MSVC(4244)
const std::vector<T> input(int_input.begin(), int_input.end());
_CCCL_DIAG_POP
const std::vector<T> output(num_items, 0);
pointer_t<T> input_ptr(input);
pointer_t<T> output_ptr(output);
Expand Down
4 changes: 4 additions & 0 deletions c/parallel/test/test_unique_by_key.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,11 @@ C2H_TEST("DeviceSelect::UniqueByKey works with floating point types", "[unique_b

operation_t op = make_operation("op", get_unique_by_key_op(get_type_info<key_t>().type));
const std::vector<int> int_input = generate<int>(num_items);
// Suppress harmless conversion warnings on MSVC
_CCCL_DIAG_PUSH
_CCCL_DIAG_SUPPRESS_MSVC(4244)
const std::vector<key_t> input_keys(int_input.begin(), int_input.end());
_CCCL_DIAG_POP
std::vector<item_t> input_values = generate<item_t>(num_items);

pointer_t<key_t> input_keys_it(input_keys);
Expand Down
16 changes: 12 additions & 4 deletions c/parallel/test/test_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include <format>
#include <fstream>
#include <memory>
#include <numeric>
#include <random>
#include <string>
#include <tuple>
Expand Down Expand Up @@ -49,9 +50,9 @@ inline std::string inspect_sass(const void* cubin, size_t cubin_size)
temp_in_file.close();

std::string command = "nvdisasm -gi ";
command += temp_in_filename;
command += temp_in_filename.string();
command += " > ";
command += temp_out_filename;
command += temp_out_filename.string();

int exec_code = std::system(command.c_str());

Expand Down Expand Up @@ -114,12 +115,19 @@ inline std::string compile(const std::string& source)
template <class T>
std::vector<T> generate(std::size_t num_items)
{
// Add support for 8-bit ints, otherwise MSVC fails with:
// error C2338: static_assert failed:
// 'invalid template argument for uniform_int_distribution:
// N4950 [rand.req.genl]/1.5 requires one of
// short, int, long, long long,
// unsigned short, unsigned int, unsigned long, or unsigned long long'
using dist_type = std::conditional_t<sizeof(T) == 1, short, T>;
std::random_device rnd_device;
std::mt19937 mersenne_engine{rnd_device()}; // Generates random integers
std::uniform_int_distribution<T> dist{T{1}, T{42}};
std::uniform_int_distribution<dist_type> dist{dist_type{1}, dist_type{42}};
std::vector<T> vec(num_items);
std::generate(vec.begin(), vec.end(), [&]() {
return dist(mersenne_engine);
return static_cast<T>(dist(mersenne_engine));
});
return vec;
}
Expand Down
2 changes: 2 additions & 0 deletions c2h/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,9 @@ target_include_directories(cccl.c2h PUBLIC "${C2H_SOURCE_DIR}/include")
target_link_libraries(cccl.c2h PUBLIC
CCCL::CCCL
Catch2::Catch2
cccl.compiler_interface_cpp17
)
cccl_configure_target(cccl.c2h DIALECT 17)

if (C2H_ENABLE_CURAND)
target_link_libraries(cccl.c2h PRIVATE CUDA::curand)
Expand Down
25 changes: 8 additions & 17 deletions c2h/generators_vector.cu
Original file line number Diff line number Diff line change
Expand Up @@ -83,12 +83,11 @@ VEC_SPECIALIZATION(int4);

VEC_SPECIALIZATION(long2);
VEC_SPECIALIZATION(long3);
_CCCL_SUPPRESS_DEPRECATED_PUSH
VEC_SPECIALIZATION(long4);
_CCCL_SUPPRESS_DEPRECATED_POP
# if _CCCL_CTK_AT_LEAST(13, 0)
VEC_SPECIALIZATION(long4_16a);
VEC_SPECIALIZATION(long4_32a);
# else
VEC_SPECIALIZATION(long4);
Comment on lines 85 to +90
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Remark: This makes long4 no longer usable for test data generation in unit tests on CTK >= 13.0. I previously changed all uses of long4 for those versions to the _16a and _32a variants to avoid warnings, but wanted to revisit this.

On second thought, maybe we are fine with no longer testing with long4 in CTK >= 13.0. So this is not a blocker.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agreed, I think this is the cleanest way around the headache of trying to actually suppress these warnings. MSVC likes to still complain even though they were suppressed >.<

# endif // _CCCL_CTK_AT_LEAST(13, 0)

// VEC_SPECIALIZATION(ulong2);
Expand All @@ -97,22 +96,20 @@ VEC_SPECIALIZATION(long4_32a);

VEC_SPECIALIZATION(longlong2);
VEC_SPECIALIZATION(longlong3);
_CCCL_SUPPRESS_DEPRECATED_PUSH
VEC_SPECIALIZATION(longlong4);
_CCCL_SUPPRESS_DEPRECATED_POP
# if _CCCL_CTK_AT_LEAST(13, 0)
VEC_SPECIALIZATION(longlong4_16a);
VEC_SPECIALIZATION(longlong4_32a);
# else
VEC_SPECIALIZATION(longlong4);
# endif // _CCCL_CTK_AT_LEAST(13, 0)

VEC_SPECIALIZATION(ulonglong2);
// VEC_SPECIALIZATION(ulonglong3);
_CCCL_SUPPRESS_DEPRECATED_PUSH
VEC_SPECIALIZATION(ulonglong4);
_CCCL_SUPPRESS_DEPRECATED_POP
# if _CCCL_CTK_AT_LEAST(13, 0)
VEC_SPECIALIZATION(ulonglong4_16a);
VEC_SPECIALIZATION(ulonglong4_32a);
# else
VEC_SPECIALIZATION(ulonglong4);
# endif // _CCCL_CTK_AT_LEAST(13, 0)

VEC_SPECIALIZATION(float2);
Expand All @@ -121,12 +118,11 @@ VEC_SPECIALIZATION(float4);

VEC_SPECIALIZATION(double2);
VEC_SPECIALIZATION(double3);
_CCCL_SUPPRESS_DEPRECATED_PUSH
VEC_SPECIALIZATION(double4);
_CCCL_SUPPRESS_DEPRECATED_POP
# if _CCCL_CTK_AT_LEAST(13, 0)
VEC_SPECIALIZATION(double4_16a);
VEC_SPECIALIZATION(double4_32a);
# else
VEC_SPECIALIZATION(double4);
# endif // _CCCL_CTK_AT_LEAST(13, 0)

# if TEST_HALF_T()
Expand Down Expand Up @@ -167,8 +163,3 @@ VEC_GEN_MOD_SPECIALIZATION(ulonglong4, unsigned long long);
VEC_GEN_MOD_SPECIALIZATION(ushort4, unsigned short);
#endif // THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_CUDA
} // namespace c2h::detail

// Suppress deprecation warnings for use of vector types in the `*cudafe1.stub.c` file
#if _CCCL_CTK_AT_LEAST(13, 0) && _CCCL_COMPILER(CLANG)
_CCCL_SUPPRESS_DEPRECATED_PUSH
#endif // _CCCL_CTK_AT_LEAST(13, 0) && _CCCL_COMPILER(CLANG)
Loading