From 0b4caafdc2df4cd4715de61fc5fb0e1f9c81abbe Mon Sep 17 00:00:00 2001 From: Tim Liu Date: Wed, 26 Jul 2023 08:46:57 +0800 Subject: [PATCH 001/136] Init version 23.10.0-SNAPSHOT (#1283) Signed-off-by: Tim Liu --- .gitmodules | 2 +- CONTRIBUTING.md | 2 +- pom.xml | 2 +- src/main/cpp/CMakeLists.txt | 2 +- thirdparty/cudf | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.gitmodules b/.gitmodules index 3c32c9d1a2..88fbcc0ffc 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,4 +1,4 @@ [submodule "thirdparty/cudf"] path = thirdparty/cudf url = https://github.com/rapidsai/cudf.git - branch = branch-23.08 + branch = branch-23.10 diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index a0846edbc1..817a9b9c07 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -148,7 +148,7 @@ $ ./build/build-in-docker install ... ``` Now cd to ~/repos/NVIDIA/spark-rapids and build with one of the options from -[spark-rapids instructions](https://github.com/NVIDIA/spark-rapids/blob/branch-23.08/CONTRIBUTING.md#building-from-source). +[spark-rapids instructions](https://github.com/NVIDIA/spark-rapids/blob/branch-23.10/CONTRIBUTING.md#building-from-source). ```bash $ ./build/buildall diff --git a/pom.xml b/pom.xml index db0ab6832c..507c191cdc 100644 --- a/pom.xml +++ b/pom.xml @@ -21,7 +21,7 @@ com.nvidia spark-rapids-jni - 23.08.0-SNAPSHOT + 23.10.0-SNAPSHOT jar RAPIDS Accelerator JNI for Apache Spark diff --git a/src/main/cpp/CMakeLists.txt b/src/main/cpp/CMakeLists.txt index 5f363fb6a8..c60c167019 100644 --- a/src/main/cpp/CMakeLists.txt +++ b/src/main/cpp/CMakeLists.txt @@ -32,7 +32,7 @@ rapids_cuda_init_architectures(SPARK_RAPIDS_JNI) project( SPARK_RAPIDS_JNI - VERSION 23.08.00 + VERSION 23.10.00 LANGUAGES C CXX CUDA ) diff --git a/thirdparty/cudf b/thirdparty/cudf index e0fa34b695..2a590dbb6a 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit e0fa34b6953490dbf15576884cdd3fbbfac87b3c +Subproject commit 2a590dbb6a06eb59bdfa97976dd5b22635b6c1f9 From 73bce8140cca8696506977cbd168838ad974fe7f Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Wed, 26 Jul 2023 11:14:08 +0800 Subject: [PATCH 002/136] Update submodule cudf to ee7e39b86fafb8fcdc842fa08ed1d414d7974929 (#1284) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 2a590dbb6a..ee7e39b86f 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 2a590dbb6a06eb59bdfa97976dd5b22635b6c1f9 +Subproject commit ee7e39b86fafb8fcdc842fa08ed1d414d7974929 From d5beeb66e8f10d820fa7d47babb9de5eb81b390c Mon Sep 17 00:00:00 2001 From: spark-rapids automation <70000568+nvauto@users.noreply.github.com> Date: Wed, 26 Jul 2023 03:47:35 +0000 Subject: [PATCH 003/136] Auto-merge use submodule in BASE ref Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 67e81aef65..ee7e39b86f 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 67e81aef654b782daa4751835577bdeb55e06168 +Subproject commit ee7e39b86fafb8fcdc842fa08ed1d414d7974929 From 54f44f14a30876392edbb67ce43b6c04d4b4d8c1 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Wed, 26 Jul 2023 17:03:57 +0800 Subject: [PATCH 004/136] Update submodule cudf to 7dcf0525f8c0f7465256b489f715fd35eab188ad (#1287) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index ee7e39b86f..7dcf0525f8 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit ee7e39b86fafb8fcdc842fa08ed1d414d7974929 +Subproject commit 7dcf0525f8c0f7465256b489f715fd35eab188ad From e2d49e8d5d8db516fc69e559f544c91428e28ea5 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Wed, 26 Jul 2023 21:03:49 +0800 Subject: [PATCH 005/136] Update submodule cudf to e55f944124cc6cc2b847325fb448c2d9155ad188 (#1288) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 7dcf0525f8..e55f944124 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 7dcf0525f8c0f7465256b489f715fd35eab188ad +Subproject commit e55f944124cc6cc2b847325fb448c2d9155ad188 From 66d930bdebd49a31e0eae1edf4621cc03c7418ff Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Wed, 26 Jul 2023 23:03:25 +0800 Subject: [PATCH 006/136] Update submodule cudf to 427f8792e04662afeccad3beaae593817c52079f (#1289) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index e55f944124..427f8792e0 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit e55f944124cc6cc2b847325fb448c2d9155ad188 +Subproject commit 427f8792e04662afeccad3beaae593817c52079f From 75f1c619cb4d71b6a9265e0d0b17793919342a33 Mon Sep 17 00:00:00 2001 From: spark-rapids automation <70000568+nvauto@users.noreply.github.com> Date: Wed, 26 Jul 2023 17:33:30 +0000 Subject: [PATCH 007/136] Auto-merge use submodule in BASE ref Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index f8e5a89e98..427f8792e0 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit f8e5a89e983065e1202f1151dd499bea3102a537 +Subproject commit 427f8792e04662afeccad3beaae593817c52079f From eee58e8bcd6303c233e2f9e79754e17a92276979 Mon Sep 17 00:00:00 2001 From: spark-rapids automation <70000568+nvauto@users.noreply.github.com> Date: Wed, 26 Jul 2023 22:33:31 +0000 Subject: [PATCH 008/136] Auto-merge use submodule in BASE ref Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 2231b15c9f..427f8792e0 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 2231b15c9f256c4885dc28e27e2767b2861290d6 +Subproject commit 427f8792e04662afeccad3beaae593817c52079f From 52a06ddcbd7b814b09ef546035de1be8c5452577 Mon Sep 17 00:00:00 2001 From: spark-rapids automation <70000568+nvauto@users.noreply.github.com> Date: Thu, 27 Jul 2023 02:34:57 +0000 Subject: [PATCH 009/136] Auto-merge use submodule in BASE ref Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index fa09cca6d9..427f8792e0 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit fa09cca6d9fb799f07cb1205d5bee2896ad594e3 +Subproject commit 427f8792e04662afeccad3beaae593817c52079f From 74de7b7b1c87ca5702ab9cff7e993c409c5ed522 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Thu, 27 Jul 2023 11:03:27 +0800 Subject: [PATCH 010/136] Update submodule cudf to 9aa2968cb89ff4d9b199ca955b47fa04f0973f8f (#1296) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 427f8792e0..9aa2968cb8 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 427f8792e04662afeccad3beaae593817c52079f +Subproject commit 9aa2968cb89ff4d9b199ca955b47fa04f0973f8f From 5955c1f26851c89a1bd0cd82008cee34053b4a71 Mon Sep 17 00:00:00 2001 From: spark-rapids automation <70000568+nvauto@users.noreply.github.com> Date: Thu, 27 Jul 2023 04:33:34 +0000 Subject: [PATCH 011/136] Auto-merge use submodule in BASE ref Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index abb59c8312..9aa2968cb8 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit abb59c83128f956c7edcb4d7744cb0faecf0026c +Subproject commit 9aa2968cb89ff4d9b199ca955b47fa04f0973f8f From 3947c9c36635fda86079d9b86528b005ecbb6e10 Mon Sep 17 00:00:00 2001 From: spark-rapids automation <70000568+nvauto@users.noreply.github.com> Date: Thu, 27 Jul 2023 21:52:11 +0000 Subject: [PATCH 012/136] Auto-merge use submodule in BASE ref Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 5600f19894..9aa2968cb8 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 5600f1989495c9545011228ffc7fcd737e2a39bc +Subproject commit 9aa2968cb89ff4d9b199ca955b47fa04f0973f8f From 71fe8f5460b8ffb0222851ef801143049f818ad8 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Fri, 28 Jul 2023 20:17:51 +0800 Subject: [PATCH 013/136] Update submodule cudf to 80641708d7dff778263d738445ddca07d2bce19e (#1301) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 9aa2968cb8..80641708d7 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 9aa2968cb89ff4d9b199ca955b47fa04f0973f8f +Subproject commit 80641708d7dff778263d738445ddca07d2bce19e From 4b2c3bb91d83837b8b4a5c88798304bde01035f7 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Sat, 29 Jul 2023 05:03:07 +0800 Subject: [PATCH 014/136] Update submodule cudf to f00e92220a337ad3af8c01c8c9e96f3c80e4f47e (#1302) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 80641708d7..f00e92220a 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 80641708d7dff778263d738445ddca07d2bce19e +Subproject commit f00e92220a337ad3af8c01c8c9e96f3c80e4f47e From a598a3e0b56638629870eb9c60ae682557516e8c Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Sat, 29 Jul 2023 11:02:44 +0800 Subject: [PATCH 015/136] Update submodule cudf to 7746af421c88c59e2261d5edfcfe61b0fc7835fd (#1304) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index f00e92220a..7746af421c 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit f00e92220a337ad3af8c01c8c9e96f3c80e4f47e +Subproject commit 7746af421c88c59e2261d5edfcfe61b0fc7835fd From d6139c977a1175446f6a0d71efa8821f123b4afd Mon Sep 17 00:00:00 2001 From: spark-rapids automation <70000568+nvauto@users.noreply.github.com> Date: Wed, 2 Aug 2023 01:02:18 +0000 Subject: [PATCH 016/136] Auto-merge use submodule in BASE ref Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 5600f19894..7746af421c 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 5600f1989495c9545011228ffc7fcd737e2a39bc +Subproject commit 7746af421c88c59e2261d5edfcfe61b0fc7835fd From ebae3dd44158daab491c823ed33003c2e6aa4245 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Thu, 3 Aug 2023 00:35:30 +0800 Subject: [PATCH 017/136] [submodule-sync] bot-submodule-sync-branch-23.10 to branch-23.10 [skip ci] [bot] (#1305) * Update submodule cudf to 9d465f258ffdfe88e7b661cce8cae59741113ae0 Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> * Update submodule cudf to 02357b17ff3b2cc3e8840281e7a9b0ed159cdcf0 Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> * Update submodule cudf to 1a75b3c4301107bd874e09dcf33a24cfb288c7f9 Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> * Update submodule cudf to 97d60c42baf9f4ba02cc95ecc3f5782030d65757 Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> * Update submodule cudf to fe307c13fc7456b0d5edfd23ab28d323a8b7fb43 Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> * Update submodule cudf to 5e8fd8e142991b2a9abdd2d11c1a1e957996597d Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> * Update submodule cudf to d5265306afdf8d78f37b223ee2c6d9fbac874484 Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --------- Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 7746af421c..d5265306af 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 7746af421c88c59e2261d5edfcfe61b0fc7835fd +Subproject commit d5265306afdf8d78f37b223ee2c6d9fbac874484 From 305de874b9d3879c7ba1bcf5cbf34492bfd74156 Mon Sep 17 00:00:00 2001 From: spark-rapids automation <70000568+nvauto@users.noreply.github.com> Date: Wed, 2 Aug 2023 22:34:03 +0000 Subject: [PATCH 018/136] Auto-merge use submodule in BASE ref Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 017beb0be1..d5265306af 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 017beb0be18c917c70f60c947e48d29a6e6f96b7 +Subproject commit d5265306afdf8d78f37b223ee2c6d9fbac874484 From 38ca80a48c0bb0a685fd48e1c6173df34e25e144 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Thu, 3 Aug 2023 11:05:31 +0800 Subject: [PATCH 019/136] Update submodule cudf to 11fd25c25e8d593ddfb4e0d29281aaf63898d9ba (#1311) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index d5265306af..11fd25c25e 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit d5265306afdf8d78f37b223ee2c6d9fbac874484 +Subproject commit 11fd25c25e8d593ddfb4e0d29281aaf63898d9ba From 4a589e21f2f7fce6748a8eec6b38548b69e63d06 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Thu, 3 Aug 2023 17:04:14 +0800 Subject: [PATCH 020/136] Update submodule cudf to 399efb960f689085bf671f6fa62916b1020e3b30 (#1312) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 11fd25c25e..399efb960f 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 11fd25c25e8d593ddfb4e0d29281aaf63898d9ba +Subproject commit 399efb960f689085bf671f6fa62916b1020e3b30 From 512c9c9f2e83ed72f82ce269ec0dd2152a16ea5e Mon Sep 17 00:00:00 2001 From: Jason Lowe Date: Thu, 3 Aug 2023 21:54:09 -0500 Subject: [PATCH 021/136] Change cudf::offset_type to cudf::size_type (#1315) Signed-off-by: Jason Lowe --- src/main/cpp/src/cast_decimal_to_string.cu | 2 +- src/main/cpp/src/cast_string.cu | 14 +++++++------- src/main/cpp/src/cast_string_to_float.cu | 16 ++++++++-------- src/main/cpp/src/map_utils.cu | 6 +++--- src/main/cpp/src/map_utils_debug.cuh | 4 ++-- src/main/cpp/src/row_conversion.cu | 2 +- src/main/cpp/tests/bloom_filter.cu | 4 ++-- thirdparty/cudf | 2 +- 8 files changed, 25 insertions(+), 25 deletions(-) diff --git a/src/main/cpp/src/cast_decimal_to_string.cu b/src/main/cpp/src/cast_decimal_to_string.cu index a256e8e917..0cd2713a2f 100644 --- a/src/main/cpp/src/cast_decimal_to_string.cu +++ b/src/main/cpp/src/cast_decimal_to_string.cu @@ -52,7 +52,7 @@ namespace { template struct decimal_to_non_ansi_string_fn { column_device_view d_decimals; - offset_type* d_offsets{}; + size_type* d_offsets{}; char* d_chars{}; /** diff --git a/src/main/cpp/src/cast_string.cu b/src/main/cpp/src/cast_string.cu index 6f9de63d10..59a27a59b7 100644 --- a/src/main/cpp/src/cast_string.cu +++ b/src/main/cpp/src/cast_string.cu @@ -159,7 +159,7 @@ template void __global__ string_to_integer_kernel(T* out, bitmask_type* validity, const char* const chars, - offset_type const* offsets, + size_type const* offsets, bitmask_type const* incoming_null_mask, size_type num_rows, bool ansi_mode, @@ -392,7 +392,7 @@ template __global__ void string_to_decimal_kernel(T* out, bitmask_type* validity, const char* const chars, - offset_type const* offsets, + size_type const* offsets, bitmask_type const* incoming_null_mask, size_type num_rows, int32_t scale, @@ -612,10 +612,10 @@ void validate_ansi_column(column_view const& col, thrust::make_counting_iterator(col.size()), row_valid_fn{col.null_mask(), source_col.null_mask()}); - offset_type string_bounds[2]; + size_type string_bounds[2]; cudaMemcpyAsync(&string_bounds, - &source_col.offsets().data()[*first_error], - sizeof(offset_type) * 2, + &source_col.offsets().data()[*first_error], + sizeof(size_type) * 2, cudaMemcpyDeviceToHost, stream.value()); stream.synchronize(); @@ -668,7 +668,7 @@ struct string_to_integer_impl { data.data(), null_mask.data(), string_col.chars().data(), - string_col.offsets().data(), + string_col.offsets().data(), string_col.null_mask(), string_col.size(), ansi_mode, @@ -737,7 +737,7 @@ struct string_to_decimal_impl { data.data(), null_mask.data(), string_col.chars().data(), - string_col.offsets().data(), + string_col.offsets().data(), string_col.null_mask(), string_col.size(), dtype.scale(), diff --git a/src/main/cpp/src/cast_string_to_float.cu b/src/main/cpp/src/cast_string_to_float.cu index 8eab9eef16..a84acffb28 100644 --- a/src/main/cpp/src/cast_string_to_float.cu +++ b/src/main/cpp/src/cast_string_to_float.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -59,7 +59,7 @@ class string_to_float { int32_t* ansi_except, size_type* valid_count, const char* const chars, - offset_type const* offsets, + size_type const* offsets, uint64_t const* const ipow, bitmask_type const* incoming_null_mask, size_type const num_rows) @@ -595,7 +595,7 @@ __global__ void string_to_float_kernel(T* out, int32_t* ansi_except, size_type* valid_count, const char* const chars, - offset_type const* offsets, + size_type const* offsets, bitmask_type const* incoming_null_mask, size_type const num_rows) { @@ -676,7 +676,7 @@ std::unique_ptr string_to_float(data_type dtype, ansi_mode ? static_cast(ansi_count.get())->data() : nullptr, static_cast(valid_count.get())->data(), string_col.chars().begin(), - string_col.offsets().begin(), + string_col.offsets().begin(), string_col.null_mask(), num_rows); } else { @@ -687,7 +687,7 @@ std::unique_ptr string_to_float(data_type dtype, ansi_mode ? static_cast(ansi_count.get())->data() : nullptr, static_cast(valid_count.get())->data(), string_col.chars().begin(), - string_col.offsets().begin(), + string_col.offsets().begin(), string_col.null_mask(), num_rows); } @@ -698,10 +698,10 @@ std::unique_ptr string_to_float(data_type dtype, auto const val = static_cast(ansi_count.get())->value(stream); if (val >= 0) { auto const error_row = num_rows - val; - offset_type string_bounds[2]; + size_type string_bounds[2]; cudaMemcpyAsync(&string_bounds, - &string_col.offsets().data()[error_row], - sizeof(offset_type) * 2, + &string_col.offsets().data()[error_row], + sizeof(size_type) * 2, cudaMemcpyDeviceToHost, stream.value()); stream.synchronize(); diff --git a/src/main/cpp/src/map_utils.cu b/src/main/cpp/src/map_utils.cu index c5384e41b0..e3d31464ba 100644 --- a/src/main/cpp/src/map_utils.cu +++ b/src/main/cpp/src/map_utils.cu @@ -500,7 +500,7 @@ struct substring_fn { cudf::device_span const d_string; cudf::device_span const> const d_ranges; - cudf::offset_type* d_offsets{}; + cudf::size_type* d_offsets{}; char* d_chars{}; __device__ void operator()(cudf::size_type const idx) @@ -557,7 +557,7 @@ std::unique_ptr extract_keys_or_values( } // Compute the offsets for the final lists of Struct. -rmm::device_uvector compute_list_offsets( +rmm::device_uvector compute_list_offsets( cudf::size_type n_lists, rmm::device_uvector const& parent_node_ids, rmm::device_uvector const& key_or_value, @@ -599,7 +599,7 @@ rmm::device_uvector compute_list_offsets( print_debug(node_child_counts, "Nodes' child keys counts", ", ", stream); #endif - auto list_offsets = rmm::device_uvector(n_lists + 1, stream, mr); + auto list_offsets = rmm::device_uvector(n_lists + 1, stream, mr); auto const copy_end = cudf::detail::copy_if_safe( node_child_counts.begin(), node_child_counts.end(), diff --git a/src/main/cpp/src/map_utils_debug.cuh b/src/main/cpp/src/map_utils_debug.cuh index 39446b2971..43584d9fd5 100644 --- a/src/main/cpp/src/map_utils_debug.cuh +++ b/src/main/cpp/src/map_utils_debug.cuh @@ -108,7 +108,7 @@ void print_pair_debug(rmm::device_uvector const& input, } // Print the final output map data (Spark's MapType, i.e., List>). -void print_output_spark_map(rmm::device_uvector const& list_offsets, +void print_output_spark_map(rmm::device_uvector const& list_offsets, std::unique_ptr const& extracted_keys, std::unique_ptr const& extracted_values, rmm::cuda_stream_view stream) @@ -138,7 +138,7 @@ void print_output_spark_map(rmm::device_uvector const& list_o stream); auto const h_list_offsets = cudf::detail::make_host_vector_sync( - cudf::device_span{list_offsets.data(), list_offsets.size()}, stream); + cudf::device_span{list_offsets.data(), list_offsets.size()}, stream); CUDF_EXPECTS(h_list_offsets.back() == extracted_keys->size(), "Invalid list offsets computation."); diff --git a/src/main/cpp/src/row_conversion.cu b/src/main/cpp/src/row_conversion.cu index 1960ce392b..c8826f98b4 100644 --- a/src/main/cpp/src/row_conversion.cu +++ b/src/main/cpp/src/row_conversion.cu @@ -183,7 +183,7 @@ struct tile_info { struct row_batch { size_type num_bytes; // number of bytes in this batch size_type row_count; // number of rows in the batch - device_uvector row_offsets; // offsets column of output cudf column + device_uvector row_offsets; // offsets column of output cudf column }; /** diff --git a/src/main/cpp/tests/bloom_filter.cu b/src/main/cpp/tests/bloom_filter.cu index 0139be6641..1ba3520f0f 100644 --- a/src/main/cpp/tests/bloom_filter.cu +++ b/src/main/cpp/tests/bloom_filter.cu @@ -118,7 +118,7 @@ TEST_F(BloomFilterTest, BuildAndProbeWithNulls) struct bloom_filter_stride_transform { int const stride; - cudf::offset_type __device__ operator()(cudf::size_type i) { return i * stride; } + cudf::size_type __device__ operator()(cudf::size_type i) { return i * stride; } }; TEST_F(BloomFilterTest, ProbeMerged) @@ -154,7 +154,7 @@ TEST_F(BloomFilterTest, ProbeMerged) thrust::transform(rmm::exec_policy(cudf::get_default_stream()), thrust::make_counting_iterator(0), thrust::make_counting_iterator(0) + 4, - premerge_offsets->mutable_view().begin(), + premerge_offsets->mutable_view().begin(), bloom_filter_stride_transform{bloom_filter_a->view().size()}); auto premerged = cudf::make_lists_column( 3, std::move(premerge_offsets), std::move(premerge_children), 0, rmm::device_buffer{}); diff --git a/thirdparty/cudf b/thirdparty/cudf index 399efb960f..dcc8d9195d 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 399efb960f689085bf671f6fa62916b1020e3b30 +Subproject commit dcc8d9195d62c64f0c132d0b67f9abf17207305e From 36f9edd66fc4e5369eae7a63c020f85561d4461e Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Fri, 4 Aug 2023 17:03:49 +0800 Subject: [PATCH 022/136] [submodule-sync] bot-submodule-sync-branch-23.10 to branch-23.10 [skip ci] [bot] (#1313) * Update submodule cudf to 9c559c94fcaa1525d7c95faf94e5486fcd992ef1 Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> * Update submodule cudf to b7994bc16b1b1743b0743860b4f02ac4da8245d5 Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> * Update submodule cudf to 15cc5011902f1026e04662e725b880f48d38ba8d Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --------- Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index dcc8d9195d..15cc501190 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit dcc8d9195d62c64f0c132d0b67f9abf17207305e +Subproject commit 15cc5011902f1026e04662e725b880f48d38ba8d From 2cb14965e5d2eccd451796d11e3a16892efa09d8 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Sat, 5 Aug 2023 05:03:37 +0800 Subject: [PATCH 023/136] Update submodule cudf to 8370cbe7f21872ea7106a7a8577a1b1a124608ff (#1316) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 15cc501190..8370cbe7f2 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 15cc5011902f1026e04662e725b880f48d38ba8d +Subproject commit 8370cbe7f21872ea7106a7a8577a1b1a124608ff From 7c3d4ae45a835055fd47a5f755300d045a846e0b Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Sat, 5 Aug 2023 11:04:13 +0800 Subject: [PATCH 024/136] Update submodule cudf to a8ef4d4e49d584d1553283f6cf9aef17baaca62f (#1317) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 8370cbe7f2..a8ef4d4e49 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 8370cbe7f21872ea7106a7a8577a1b1a124608ff +Subproject commit a8ef4d4e49d584d1553283f6cf9aef17baaca62f From 71f15b3ea6bc8d7dca63da9c7c1c1b3dc83132c0 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Tue, 8 Aug 2023 05:04:56 +0800 Subject: [PATCH 025/136] Update submodule cudf to e92de8113d186389ec867bd7957288b82e439da2 (#1318) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index a8ef4d4e49..e92de8113d 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit a8ef4d4e49d584d1553283f6cf9aef17baaca62f +Subproject commit e92de8113d186389ec867bd7957288b82e439da2 From 9823f918638c2a76d48a394e434c4522f6a25b17 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Tue, 8 Aug 2023 11:04:00 +0800 Subject: [PATCH 026/136] Update submodule cudf to 9e099cef25b11821c6307bb9c231656a2bae700f (#1320) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index e92de8113d..9e099cef25 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit e92de8113d186389ec867bd7957288b82e439da2 +Subproject commit 9e099cef25b11821c6307bb9c231656a2bae700f From e58d5f4d332cd6af5ed5a7ed519db1ce62ce1ec7 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Wed, 9 Aug 2023 05:04:31 +0800 Subject: [PATCH 027/136] Update submodule cudf to 9b80bfdc71d68bb27646124f674aa2d15585fe97 (#1322) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 9e099cef25..9b80bfdc71 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 9e099cef25b11821c6307bb9c231656a2bae700f +Subproject commit 9b80bfdc71d68bb27646124f674aa2d15585fe97 From 9e2b164f95662611e456ace575a5c1617607ab4c Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Wed, 9 Aug 2023 11:04:25 +0800 Subject: [PATCH 028/136] Update submodule cudf to ba6ff60aeaed59828770bd36a7026ad79776f30e (#1323) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 9b80bfdc71..ba6ff60aea 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 9b80bfdc71d68bb27646124f674aa2d15585fe97 +Subproject commit ba6ff60aeaed59828770bd36a7026ad79776f30e From 55d4d4ed9c45abdf1c29feb953fe1cea20df7c0d Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Wed, 9 Aug 2023 16:18:36 +0800 Subject: [PATCH 029/136] Update submodule cudf to edb25a84aaafa0d65d36ebb94113393d4b6474fb (#1324) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index ba6ff60aea..edb25a84aa 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit ba6ff60aeaed59828770bd36a7026ad79776f30e +Subproject commit edb25a84aaafa0d65d36ebb94113393d4b6474fb From 145f0feab64be6a9e825b7b48a4849abbbef267a Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Wed, 9 Aug 2023 20:19:04 +0800 Subject: [PATCH 030/136] Update submodule cudf to da6ac73e8849a6d5d7471f8aad60a8fd1141fe22 (#1325) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index edb25a84aa..da6ac73e88 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit edb25a84aaafa0d65d36ebb94113393d4b6474fb +Subproject commit da6ac73e8849a6d5d7471f8aad60a8fd1141fe22 From 853cb98b2fdf82fa2da07ce51a895b7e02c845b1 Mon Sep 17 00:00:00 2001 From: spark-rapids automation <70000568+nvauto@users.noreply.github.com> Date: Wed, 9 Aug 2023 21:48:46 +0000 Subject: [PATCH 031/136] Auto-merge use submodule in BASE ref Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 9d794877fd..da6ac73e88 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 9d794877fdb7822ea2194e045aba45e0317cf577 +Subproject commit da6ac73e8849a6d5d7471f8aad60a8fd1141fe22 From 67ade9b651372a8f307a973d6dba6fc7d6cbaeda Mon Sep 17 00:00:00 2001 From: spark-rapids automation <70000568+nvauto@users.noreply.github.com> Date: Thu, 10 Aug 2023 01:37:50 +0000 Subject: [PATCH 032/136] Auto-merge use submodule in BASE ref Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 8150d38e08..da6ac73e88 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 8150d38e080c8fb021921ade83fe3aa3be04b47d +Subproject commit da6ac73e8849a6d5d7471f8aad60a8fd1141fe22 From 719adc1968b53b8e82368832c3404cc21472d268 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Thu, 10 Aug 2023 11:04:34 +0800 Subject: [PATCH 033/136] Update submodule cudf to e8df03754021e3decfc6640b58bd7a0770b0c230 (#1327) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index da6ac73e88..e8df037540 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit da6ac73e8849a6d5d7471f8aad60a8fd1141fe22 +Subproject commit e8df03754021e3decfc6640b58bd7a0770b0c230 From d6e59369450cdfac9345a94cacb1db3d695115fc Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Fri, 11 Aug 2023 05:05:57 +0800 Subject: [PATCH 034/136] Update submodule cudf to b743cc7bff33584434705d41ef452718abdc7ce6 (#1332) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index e8df037540..b743cc7bff 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit e8df03754021e3decfc6640b58bd7a0770b0c230 +Subproject commit b743cc7bff33584434705d41ef452718abdc7ce6 From 2c5cd20863745a411fc9cec9086d65a13f24c9de Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Sat, 12 Aug 2023 04:19:07 +0800 Subject: [PATCH 035/136] Update submodule cudf to 1050325fdbe9c474a22399c184ea6aa5119934c7 (#1333) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index b743cc7bff..1050325fdb 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit b743cc7bff33584434705d41ef452718abdc7ce6 +Subproject commit 1050325fdbe9c474a22399c184ea6aa5119934c7 From f8a5dc11255a0f3da2b3d3781c4361e83f03d24d Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Sat, 12 Aug 2023 11:04:20 +0800 Subject: [PATCH 036/136] Update submodule cudf to bf9b1101c2cfd84f3f4d463331927090dc39ec9c (#1334) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 1050325fdb..bf9b1101c2 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 1050325fdbe9c474a22399c184ea6aa5119934c7 +Subproject commit bf9b1101c2cfd84f3f4d463331927090dc39ec9c From 1cba229847f4238556c3b1de83876e63ca485326 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Sat, 12 Aug 2023 17:04:46 +0800 Subject: [PATCH 037/136] Update submodule cudf to 989c4116a3b054f46a7f1152f234b2601d2583b0 (#1335) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index bf9b1101c2..989c4116a3 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit bf9b1101c2cfd84f3f4d463331927090dc39ec9c +Subproject commit 989c4116a3b054f46a7f1152f234b2601d2583b0 From fd5d66c18c4c98518c71f4ad47e9a4aa29dada55 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Mon, 14 Aug 2023 22:18:54 +0800 Subject: [PATCH 038/136] Update submodule cudf to 65e572dc2ae8b302dd658ce66f85f476f1334775 (#1339) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 989c4116a3..65e572dc2a 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 989c4116a3b054f46a7f1152f234b2601d2583b0 +Subproject commit 65e572dc2ae8b302dd658ce66f85f476f1334775 From bdd2f2e67ddc99dfbbb36f10fee914353c7d0714 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Tue, 15 Aug 2023 04:18:37 +0800 Subject: [PATCH 039/136] Update submodule cudf to 5a9241681b9290f233e4f5470c1d4a4b394ac24b (#1341) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 65e572dc2a..5a9241681b 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 65e572dc2ae8b302dd658ce66f85f476f1334775 +Subproject commit 5a9241681b9290f233e4f5470c1d4a4b394ac24b From bd985d2cd76a3c01626f8b7e9eadf7401f14d42a Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Tue, 15 Aug 2023 11:04:44 +0800 Subject: [PATCH 040/136] Update submodule cudf to 8b72662abf8b044a176fa9aac082c1834686ef83 (#1343) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 5a9241681b..8b72662abf 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 5a9241681b9290f233e4f5470c1d4a4b394ac24b +Subproject commit 8b72662abf8b044a176fa9aac082c1834686ef83 From ce14288bc53d0ec3fc41263ea6cc15a644bdff1e Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Tue, 15 Aug 2023 13:22:54 +0800 Subject: [PATCH 041/136] Update submodule cudf to 1d58d5f4744e191af1044d66ff06a2c62b79ab5e (#1345) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 8b72662abf..1d58d5f474 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 8b72662abf8b044a176fa9aac082c1834686ef83 +Subproject commit 1d58d5f4744e191af1044d66ff06a2c62b79ab5e From b99cf35468e650e4920896c719add78e32578b31 Mon Sep 17 00:00:00 2001 From: Liangcai Li Date: Wed, 16 Aug 2023 10:52:19 +0800 Subject: [PATCH 042/136] Enable memcheck for jni unit tests (#1321) compute-saniziter is a tool can detect some GPU memory relevant issues, this PR is to enable it in memory check mode for the JNI unit tests. Some explanation for the parameters of the Compute Sanitizer. --log-file should be used to avoid a corrupting output issue from the surefire plugin. --error-exitcode is used to fail the build process if any error is caught by the Compute Sanitizer. --launch-timeout is set to 10 minutes, and it should be enough since we monitor only the forked test processes. --------- Signed-off-by: Liangcai Li Co-authored-by: Jason Lowe --- .gitignore | 5 ++++ CONTRIBUTING.md | 34 ++++++++++++++++++++++++ build/sanitizer-java/bin/java | 25 ++++++++++++++++++ ci/nightly-build.sh | 3 ++- ci/premerge-build.sh | 5 ++-- pom.xml | 49 ++++++++++++++++++++++++++++++++--- 6 files changed, 115 insertions(+), 6 deletions(-) create mode 100755 build/sanitizer-java/bin/java diff --git a/.gitignore b/.gitignore index 67cc1b237a..23b84112cd 100644 --- a/.gitignore +++ b/.gitignore @@ -47,3 +47,8 @@ target/ ## VSCode IDE .vscode + +#Generated files +cufile.log +rmm_log.txt +sanitizer_for_pid_*.log diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 817a9b9c07..1c045a137b 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -224,6 +224,40 @@ in errors finding libraries. The script `build/run-in-docker` was created to hel situation. A test can be run directly using this script or the script can be run without any arguments to get into an interactive shell inside the container. ```build/run-in-docker target/cmake-build/gtests/ROW_CONVERSION``` + +#### Testing with Compute Sanitizer +[Compute Sanitizer](https://docs.nvidia.com/compute-sanitizer/ComputeSanitizer/index.html) is a +functional correctness checking suite included in the CUDA toolkit. The RAPIDS Accelerator JNI +supports leveraging the Compute Sanitizer in memcheck mode in the unit tests to help catch any kernels +that may be doing something incorrectly. To run the unit tests with the Compute Sanitizer, append the +`-DUSE_SANITIZER=ON` to the build command. e.g. +``` +> ./build/build-in-docker clean package -DUSE_SANITIZER=ON +``` + +The Compute Sanitizer will output its report into one or multiple log files named as +`sanitizer_for_pid_.log` under the current workspace root path. + +Please note not all the unit tests can run with Compute Sanitizer. For example, `RmmTest#testEventHandler`, +a problematic test, intentionally tries an illegal allocation because of a too big size as part of the +test, but Compute Sanitizer will still report the errors and fail the whole build process. +`UnsafeMemoryAccessorTest` is for host memory only, so there is no need to run it with +Compute Sanitizer either. + +If you think your tests are not suitable for Compute Sanitizer, please add the JUnit5 tag (`@Tag("noSanitizer")`) +to the tests or the test class. +``` +@Tag("noSanitizer") +class ExceptionCaseTest { ... } + +# or for a single test +class NormalCaseTest { + + @Tag("noSanitizer") + public void testOneErrorCase(){ ... } +} +``` + ### Benchmarks Benchmarks exist for c++ benchmarks using NVBench and are in the `src/main/cpp/benchmarks` directory. To build these benchmarks requires the `-DBUILD_BENCHMARKS` build option. Once built, the benchmarks diff --git a/build/sanitizer-java/bin/java b/build/sanitizer-java/bin/java new file mode 100755 index 0000000000..4e1c991c1a --- /dev/null +++ b/build/sanitizer-java/bin/java @@ -0,0 +1,25 @@ +#!/bin/bash +# +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# This special Java executable is specified to the "jvm" configuration of the +# the surefire plugin to intercept forking the processes for tests. Then +# the tests will run with the compute-sanitizer tool. +exec compute-sanitizer --tool memcheck \ + --launch-timeout 600 \ + --error-exitcode -2 \ + --log-file "./sanitizer_for_pid_%p.log" \ + java "$@" diff --git a/ci/nightly-build.sh b/ci/nightly-build.sh index b073abbae9..668d650c5c 100755 --- a/ci/nightly-build.sh +++ b/ci/nightly-build.sh @@ -31,4 +31,5 @@ ${MVN} clean package ${MVN_MIRROR} \ -DCPP_PARALLEL_LEVEL=${PARALLEL_LEVEL} \ -Dlibcudf.build.configure=true \ -DUSE_GDS=${USE_GDS} -Dtest=*,!CuFileTest,!CudaFatalTest,!ColumnViewNonEmptyNullsTest \ - -DBUILD_TESTS=ON -Dcuda.version=$CUDA_VER + -DBUILD_TESTS=ON -Dcuda.version=$CUDA_VER \ + -DUSE_SANITIZER=ON diff --git a/ci/premerge-build.sh b/ci/premerge-build.sh index 9a3cd41184..a2b340cd5f 100755 --- a/ci/premerge-build.sh +++ b/ci/premerge-build.sh @@ -1,6 +1,6 @@ #!/bin/bash # -# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2022-2023, NVIDIA CORPORATION. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -27,4 +27,5 @@ ${MVN} verify ${MVN_MIRROR} \ -DCPP_PARALLEL_LEVEL=${PARALLEL_LEVEL} \ -Dlibcudf.build.configure=true \ -DUSE_GDS=ON -Dtest=*,!CuFileTest,!CudaFatalTest,!ColumnViewNonEmptyNullsTest \ - -DBUILD_TESTS=ON + -DBUILD_TESTS=ON \ + -DUSE_SANITIZER=ON diff --git a/pom.xml b/pom.xml index 507c191cdc..1b1787919c 100644 --- a/pom.xml +++ b/pom.xml @@ -141,6 +141,12 @@ ${junit.version} test + + org.junit.jupiter + junit-jupiter-engine + ${junit.version} + test + org.junit.jupiter junit-jupiter-params @@ -199,14 +205,51 @@ + + + + + + + test-with-sanitizer + + + USE_SANITIZER + ON + + + + + + maven-surefire-plugin + + + default-test + + test + + + !noSanitizer + ${project.basedir}/build/sanitizer-java/bin/java + + + + + sanitizer-excluded-cases-test + + test + + + noSanitizer + + non-empty-null-test test - -da:ai.rapids.cudf.AssertEmptyNulls - ColumnViewNonEmptyNullsTest + ${project.basedir}/build/sanitizer-java/bin/java @@ -250,7 +293,7 @@ - + test-cpp From 82c5b4c3bb66a043600cf658ee2ba04904fead4d Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Wed, 16 Aug 2023 11:04:04 +0800 Subject: [PATCH 043/136] Update submodule cudf to 20c3aaba97d69bccd0e0bac90d57ec722437bd9b (#1348) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 1d58d5f474..20c3aaba97 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 1d58d5f4744e191af1044d66ff06a2c62b79ab5e +Subproject commit 20c3aaba97d69bccd0e0bac90d57ec722437bd9b From c9771e58ca9bc670684cfa2486608d6f9e9a47c4 Mon Sep 17 00:00:00 2001 From: Liangcai Li Date: Wed, 16 Aug 2023 11:34:00 +0800 Subject: [PATCH 044/136] Update sanitizer settings for ci (#1347) Disable sanitizer for premerge and enable it for submodule sync-up Signed-off-by: Liangcai Li --- ci/premerge-build.sh | 3 +-- ci/submodule-sync.sh | 3 ++- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ci/premerge-build.sh b/ci/premerge-build.sh index a2b340cd5f..e3adc10b3e 100755 --- a/ci/premerge-build.sh +++ b/ci/premerge-build.sh @@ -27,5 +27,4 @@ ${MVN} verify ${MVN_MIRROR} \ -DCPP_PARALLEL_LEVEL=${PARALLEL_LEVEL} \ -Dlibcudf.build.configure=true \ -DUSE_GDS=ON -Dtest=*,!CuFileTest,!CudaFatalTest,!ColumnViewNonEmptyNullsTest \ - -DBUILD_TESTS=ON \ - -DUSE_SANITIZER=ON + -DBUILD_TESTS=ON diff --git a/ci/submodule-sync.sh b/ci/submodule-sync.sh index c9cd6e5016..2b8b186152 100755 --- a/ci/submodule-sync.sh +++ b/ci/submodule-sync.sh @@ -70,7 +70,8 @@ ${MVN} verify ${MVN_MIRROR} \ -DCPP_PARALLEL_LEVEL=${PARALLEL_LEVEL} \ -Dlibcudf.build.configure=true \ -DUSE_GDS=ON -Dtest=*,!CuFileTest,!CudaFatalTest,!ColumnViewNonEmptyNullsTest \ - -DBUILD_TESTS=ON + -DBUILD_TESTS=ON \ + -DUSE_SANITIZER=ON verify_status=$? set -e From df1ee00966db7b5650df416bfba0b41849a8f546 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Thu, 17 Aug 2023 05:09:27 +0800 Subject: [PATCH 045/136] Update submodule cudf to fdeababc65d41b52821a47ac025b627d59920f1d (#1350) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 20c3aaba97..fdeababc65 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 20c3aaba97d69bccd0e0bac90d57ec722437bd9b +Subproject commit fdeababc65d41b52821a47ac025b627d59920f1d From 8903a0cde3c980b0be87f866ca375481b8debaf5 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Thu, 17 Aug 2023 11:09:26 +0800 Subject: [PATCH 046/136] Update submodule cudf to 4fd6dd7b960e497abd13127e8eb7939f168bce08 (#1351) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index fdeababc65..4fd6dd7b96 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit fdeababc65d41b52821a47ac025b627d59920f1d +Subproject commit 4fd6dd7b960e497abd13127e8eb7939f168bce08 From becb9737fa6b255cb35dc404310d376e17da5bb7 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Thu, 17 Aug 2023 17:12:06 +0800 Subject: [PATCH 047/136] Update submodule cudf to 41f0caf53662cfde8146647574e705982eb558b1 (#1352) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 4fd6dd7b96..41f0caf536 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 4fd6dd7b960e497abd13127e8eb7939f168bce08 +Subproject commit 41f0caf53662cfde8146647574e705982eb558b1 From 72cb83757eb64d86b70c5e8db52b712444a021b8 Mon Sep 17 00:00:00 2001 From: Gera Shegalov Date: Thu, 17 Aug 2023 13:56:47 -0700 Subject: [PATCH 048/136] Add JNI backend for Spark SQL function `conv` for (hexa)decimals (#1314) Contributes to NVIDIA/spark-rapids#8511 POC supporting form/to radices 10 and 16 leveraging existing libcudf API Signed-off-by: Gera Shegalov --- src/main/cpp/src/CastStringJni.cpp | 108 +++++++++++++++ src/main/cpp/src/row_conversion.cu | 4 +- .../nvidia/spark/rapids/jni/CastStrings.java | 16 ++- .../spark/rapids/jni/CastStringsTest.java | 126 ++++++++++++++++-- 4 files changed, 241 insertions(+), 13 deletions(-) diff --git a/src/main/cpp/src/CastStringJni.cpp b/src/main/cpp/src/CastStringJni.cpp index f18e0de956..1250ad6159 100644 --- a/src/main/cpp/src/CastStringJni.cpp +++ b/src/main/cpp/src/CastStringJni.cpp @@ -15,6 +15,19 @@ */ #include "cast_string.hpp" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include "cudf_jni_apis.hpp" #include "dtype_utils.hpp" @@ -111,4 +124,99 @@ JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_CastStrings_fromDecimal } CATCH_CAST_EXCEPTION(env, 0); } + +JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_CastStrings_toIntegersWithBase( + JNIEnv* env, jclass, jlong input_column, jint base, jboolean ansi_enabled, jint j_dtype) +{ + JNI_NULL_CHECK(env, input_column, "input column is null", 0); + using namespace cudf; + try { + if (base != 10 && base != 16) { + auto const error_msg = "Bases supported 10, 16; Actual: " + std::to_string(base); + throw spark_rapids_jni::cast_error(0, error_msg); + } + + jni::auto_set_device(env); + auto const zero_scalar = numeric_scalar(0); + auto const res_data_type = jni::make_data_type(j_dtype, 0); + auto const input_view{*reinterpret_cast(input_column)}; + auto const validity_regex_str = [&] { + switch (base) { + case 10: return R"(^\s*(-?[0-9]+).*)"; break; + case 16: return R"(^\s*(-?[0-9a-fA-F]+).*)"; break; + default: throw spark_rapids_jni::cast_error(0, "INFEASIBLE"); break; + } + }(); + + auto const validity_regex = strings::regex_program::create(validity_regex_str); + auto const valid_rows = strings::matches_re(input_view, *validity_regex); + auto const prepped_table = strings::extract(input_view, *validity_regex); + const strings_column_view prepped_view{prepped_table->get_column(0)}; + auto int_col = [&] { + switch (base) { + case 10: { + return strings::to_integers(prepped_view, res_data_type); + } break; + case 16: { + auto const is_negative = strings::starts_with(prepped_view, string_scalar("-")); + auto const pos_vals = strings::hex_to_integers(prepped_view, res_data_type); + auto neg_vals = + binary_operation(zero_scalar, *pos_vals, binary_operator::SUB, res_data_type); + return copy_if_else(*neg_vals, *pos_vals, *is_negative); + } + default: { + throw spark_rapids_jni::cast_error(0, "INFEASIBLE"); + break; + } + } + }(); + + auto unmatched_implies_zero = copy_if_else(*int_col, zero_scalar, *valid_rows); + + // output nulls: original + all rows matching \s* + + auto const space_only_regex = strings::regex_program::create(R"(^\s*$)"); + auto const extra_null_rows = strings::matches_re(input_view, *space_only_regex); + auto const extra_mask = unary_operation(*extra_null_rows, unary_operator::NOT); + + auto const original_mask = mask_to_bools(input_view.null_mask(), 0, input_view.size()); + auto const new_mask = binary_operation( + *original_mask, *extra_mask, binary_operator::BITWISE_AND, data_type(type_id::BOOL8)); + + auto const [null_mask, null_count] = bools_to_mask(*new_mask); + unmatched_implies_zero->set_null_mask(*null_mask, null_count); + return jni::release_as_jlong(unmatched_implies_zero); + } + CATCH_CAST_EXCEPTION(env, 0); +} + +JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_CastStrings_fromIntegersWithBase( + JNIEnv* env, jclass, jlong input_column, jint base) +{ + JNI_NULL_CHECK(env, input_column, "input column is null", 0); + using namespace cudf; + try { + jni::auto_set_device(env); + auto input_view{*reinterpret_cast(input_column)}; + auto result = [&] { + switch (base) { + case 10: { + return strings::from_integers(input_view); + } break; + case 16: { + auto pre_res = strings::integers_to_hex(input_view); + auto const regex = strings::regex_program::create("^0?([0-9a-fA-F]+)$"); + auto const wo_leading_zeros = strings::extract(strings_column_view(*pre_res), *regex); + return std::move(wo_leading_zeros->release()[0]); + } + default: { + auto const error_msg = "Bases supported 10, 16; Actual: " + std::to_string(base); + throw spark_rapids_jni::cast_error(0, error_msg); + } + } + }(); + return jni::release_as_jlong(result); + } + CATCH_CAST_EXCEPTION(env, 0); +} } diff --git a/src/main/cpp/src/row_conversion.cu b/src/main/cpp/src/row_conversion.cu index c8826f98b4..c1f94598d0 100644 --- a/src/main/cpp/src/row_conversion.cu +++ b/src/main/cpp/src/row_conversion.cu @@ -181,8 +181,8 @@ struct tile_info { * */ struct row_batch { - size_type num_bytes; // number of bytes in this batch - size_type row_count; // number of rows in the batch + size_type num_bytes; // number of bytes in this batch + size_type row_count; // number of rows in the batch device_uvector row_offsets; // offsets column of output cudf column }; diff --git a/src/main/java/com/nvidia/spark/rapids/jni/CastStrings.java b/src/main/java/com/nvidia/spark/rapids/jni/CastStrings.java index 4368d6e098..5df44c77c1 100644 --- a/src/main/java/com/nvidia/spark/rapids/jni/CastStrings.java +++ b/src/main/java/com/nvidia/spark/rapids/jni/CastStrings.java @@ -82,7 +82,7 @@ public static ColumnVector toDecimal(ColumnView cv, boolean ansiMode, boolean st /** * Convert a decimal column to a string column. - * + * * @param cv the column data to process * @return the converted column */ @@ -102,10 +102,24 @@ public static ColumnVector toFloat(ColumnView cv, boolean ansiMode, DType type) return new ColumnVector(toFloat(cv.getNativeView(), ansiMode, type.getTypeId().getNativeId())); } + + public static ColumnVector toIntegersWithBase(ColumnView cv, int base, + boolean ansiEnabled, DType type) { + return new ColumnVector(toIntegersWithBase(cv.getNativeView(), base, ansiEnabled, + type.getTypeId().getNativeId())); + } + + public static ColumnVector fromIntegersWithBase(ColumnView cv, int base) { + return new ColumnVector(fromIntegersWithBase(cv.getNativeView(), base)); + } + private static native long toInteger(long nativeColumnView, boolean ansi_enabled, boolean strip, int dtype); private static native long toDecimal(long nativeColumnView, boolean ansi_enabled, boolean strip, int precision, int scale); private static native long toFloat(long nativeColumnView, boolean ansi_enabled, int dtype); private static native long fromDecimal(long nativeColumnView); + private static native long toIntegersWithBase(long nativeColumnView, int base, + boolean ansiEnabled, int dtype); + private static native long fromIntegersWithBase(long nativeColumnView, int base); } \ No newline at end of file diff --git a/src/test/java/com/nvidia/spark/rapids/jni/CastStringsTest.java b/src/test/java/com/nvidia/spark/rapids/jni/CastStringsTest.java index 1c298231af..5f872ffdcb 100644 --- a/src/test/java/com/nvidia/spark/rapids/jni/CastStringsTest.java +++ b/src/test/java/com/nvidia/spark/rapids/jni/CastStringsTest.java @@ -16,20 +16,18 @@ package com.nvidia.spark.rapids.jni; -import ai.rapids.cudf.AssertUtils; -import ai.rapids.cudf.ColumnVector; -import ai.rapids.cudf.DType; -import ai.rapids.cudf.Table; -import com.nvidia.spark.rapids.jni.CastException; -import org.junit.jupiter.api.Test; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.fail; -import java.math.BigDecimal; -import java.math.RoundingMode; -import java.util.stream.IntStream; import java.util.ArrayList; import java.util.List; -import static org.junit.jupiter.api.Assertions.*; +import org.junit.jupiter.api.Test; + +import ai.rapids.cudf.AssertUtils; +import ai.rapids.cudf.ColumnVector; +import ai.rapids.cudf.DType; +import ai.rapids.cudf.Table; public class CastStringsTest { @Test @@ -194,4 +192,112 @@ void castToDecimalNoStripTest() { } } } + + + @Test + void baseDec2HexTest() { + try( + Table input = new Table.TestBuilder().column( + null, + " ", + "junk-510junk510", + "--510", + " -510junk510", + " 510junk510", + "510", + "00510", + "00-510" + ).build(); + + Table expected = new Table.TestBuilder().column( + null, + null, + "0", + "0", + "18446744073709551106", + "510", + "510", + "510", + "0" + ).column( + null, + null, + "0", + "0", + "FFFFFFFFFFFFFE02", + "1FE", + "1FE", + "1FE", + "0" + ).build(); + + ColumnVector intCol = CastStrings.toIntegersWithBase(input.getColumn(0), 10, false, + DType.UINT64); + ColumnVector decStrCol = CastStrings.fromIntegersWithBase(intCol, 10); + ColumnVector hexStrCol = CastStrings.fromIntegersWithBase(intCol, 16); + ) { + ai.rapids.cudf.TableDebug.get().debug("intCol", intCol); + AssertUtils.assertColumnsAreEqual(expected.getColumn(0), decStrCol, "decStrCol"); + AssertUtils.assertColumnsAreEqual(expected.getColumn(1), hexStrCol, "hexStrCol"); + } + } + + @Test + void baseHex2DecTest() { + try( + Table input = new Table.TestBuilder().column( + null, + "junk", + "0", + "f", + "junk-5Ajunk5A", + "--5A", + " -5Ajunk5A", + " 5Ajunk5A", + "5a", + "05a", + "005a", + "00-5a", + "NzGGImWNRh" + ).build(); + + Table expected = new Table.TestBuilder().column( + null, + "0", + "0", + "15", + "0", + "0", + "18446744073709551526", + "90", + "90", + "90", + "90", + "0", + "0" + ).column( + null, + "0", + "0", + "F", + "0", + "0", + "FFFFFFFFFFFFFFA6", + "5A", + "5A", + "5A", + "5A", + "0", + "0" + ).build(); + + ColumnVector intCol = CastStrings.toIntegersWithBase(input.getColumn(0), 16, false, DType.UINT64); + ColumnVector decStrCol = CastStrings.fromIntegersWithBase(intCol, 10); + ColumnVector hexStrCol = CastStrings.fromIntegersWithBase(intCol, 16); + ) { + ai.rapids.cudf.TableDebug.get().debug("intCol", intCol); + AssertUtils.assertColumnsAreEqual(expected.getColumn(0), decStrCol, "decStrCol"); + AssertUtils.assertColumnsAreEqual(expected.getColumn(1), hexStrCol, "hexStrCol"); + } + } } From 991ec6dfccaf86c5f2d6f1e1b172d8913790b3de Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Fri, 18 Aug 2023 05:10:38 +0800 Subject: [PATCH 049/136] Update submodule cudf to f543dfa1356f02ae6b581e3e2584fffccfc69c76 (#1354) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 41f0caf536..f543dfa135 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 41f0caf53662cfde8146647574e705982eb558b1 +Subproject commit f543dfa1356f02ae6b581e3e2584fffccfc69c76 From 9a9cbfb3dfdbd42afd385caaa37472a9d56d1bb2 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Fri, 18 Aug 2023 10:45:24 +0800 Subject: [PATCH 050/136] Update submodule cudf to 28b5b6e59e49343496e441ea45a843b4dd8d1bf3 (#1355) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index f543dfa135..28b5b6e59e 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit f543dfa1356f02ae6b581e3e2584fffccfc69c76 +Subproject commit 28b5b6e59e49343496e441ea45a843b4dd8d1bf3 From 50cc936394ccf697b75f2a7065e1ab12629e5371 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Sat, 19 Aug 2023 05:08:31 +0800 Subject: [PATCH 051/136] Update submodule cudf to b798a70d608cbbe2c7f372a8c21354455ba56f74 (#1356) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 28b5b6e59e..b798a70d60 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 28b5b6e59e49343496e441ea45a843b4dd8d1bf3 +Subproject commit b798a70d608cbbe2c7f372a8c21354455ba56f74 From 8ae64b7f6f38050263c6e39002c609c793ccca7a Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Sat, 19 Aug 2023 11:09:14 +0800 Subject: [PATCH 052/136] Update submodule cudf to 263a85d70edbf08232beb3286c1a2d0f08afe76e (#1357) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index b798a70d60..263a85d70e 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit b798a70d608cbbe2c7f372a8c21354455ba56f74 +Subproject commit 263a85d70edbf08232beb3286c1a2d0f08afe76e From 436407818176b6670ff6b9044469db9c483941b7 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Mon, 21 Aug 2023 22:20:51 +0800 Subject: [PATCH 053/136] Update submodule cudf to 5eee8ac988686dd8e0cc8328194055aa7579d9b6 (#1358) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 263a85d70e..5eee8ac988 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 263a85d70edbf08232beb3286c1a2d0f08afe76e +Subproject commit 5eee8ac988686dd8e0cc8328194055aa7579d9b6 From 684e69c72375f6d560dce72e18fa13c438ceb10d Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Tue, 22 Aug 2023 05:10:36 +0800 Subject: [PATCH 054/136] Update submodule cudf to 55a4ecf14d43dae92254805358e09d6a60010fc9 (#1359) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 5eee8ac988..55a4ecf14d 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 5eee8ac988686dd8e0cc8328194055aa7579d9b6 +Subproject commit 55a4ecf14d43dae92254805358e09d6a60010fc9 From 70e01209ad0baad4422dd1f0e274904a01f28897 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Tue, 22 Aug 2023 11:10:10 +0800 Subject: [PATCH 055/136] Update submodule cudf to 261bcb2a1f190ec3a6689258f548b1f2c3f49dd6 (#1360) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 55a4ecf14d..261bcb2a1f 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 55a4ecf14d43dae92254805358e09d6a60010fc9 +Subproject commit 261bcb2a1f190ec3a6689258f548b1f2c3f49dd6 From 74f24152bc6da7d78c97d3625a811be841e85503 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Tue, 22 Aug 2023 22:22:09 +0800 Subject: [PATCH 056/136] Update submodule cudf to 595308b528dbcc6d409c28aa11d2f8c6fe1886ed (#1361) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 261bcb2a1f..595308b528 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 261bcb2a1f190ec3a6689258f548b1f2c3f49dd6 +Subproject commit 595308b528dbcc6d409c28aa11d2f8c6fe1886ed From 8c2e1383e31b351867507cdf98dc32a5fb857d42 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Wed, 23 Aug 2023 05:10:02 +0800 Subject: [PATCH 057/136] Update submodule cudf to 0e5f9dbac252cd4a59b7d33967b8df4acbf99571 (#1362) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 595308b528..0e5f9dbac2 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 595308b528dbcc6d409c28aa11d2f8c6fe1886ed +Subproject commit 0e5f9dbac252cd4a59b7d33967b8df4acbf99571 From 82a5c2c997052b06495726af7e6ba62485299328 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Wed, 23 Aug 2023 11:09:15 +0800 Subject: [PATCH 058/136] Update submodule cudf to 62148b42718e6ddd466e4f31a03d8c98a48ed191 (#1365) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 0e5f9dbac2..62148b4271 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 0e5f9dbac252cd4a59b7d33967b8df4acbf99571 +Subproject commit 62148b42718e6ddd466e4f31a03d8c98a48ed191 From 853a79a1e8f6c7cbde6c74c35f5288b62c01dbdc Mon Sep 17 00:00:00 2001 From: Gera Shegalov Date: Wed, 23 Aug 2023 10:19:22 -0700 Subject: [PATCH 059/136] Avoid using invlalid null_mask for zero null_count (#1364) Fixes #1363 - adds a unit test reproducing the issue - adds a null_count check Signed-off-by: Gera Shegalov --- src/main/cpp/src/CastStringJni.cpp | 25 +++++--- .../spark/rapids/jni/CastStringsTest.java | 64 +++++++++++++------ 2 files changed, 59 insertions(+), 30 deletions(-) diff --git a/src/main/cpp/src/CastStringJni.cpp b/src/main/cpp/src/CastStringJni.cpp index 1250ad6159..d09bc33e4c 100644 --- a/src/main/cpp/src/CastStringJni.cpp +++ b/src/main/cpp/src/CastStringJni.cpp @@ -150,9 +150,9 @@ JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_CastStrings_toIntegersW auto const validity_regex = strings::regex_program::create(validity_regex_str); auto const valid_rows = strings::matches_re(input_view, *validity_regex); - auto const prepped_table = strings::extract(input_view, *validity_regex); - const strings_column_view prepped_view{prepped_table->get_column(0)}; - auto int_col = [&] { + auto const int_col = [&] { + auto const prepped_table = strings::extract(input_view, *validity_regex); + const strings_column_view prepped_view{prepped_table->get_column(0)}; switch (base) { case 10: { return strings::to_integers(prepped_view, res_data_type); @@ -174,14 +174,19 @@ JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_CastStrings_toIntegersW auto unmatched_implies_zero = copy_if_else(*int_col, zero_scalar, *valid_rows); // output nulls: original + all rows matching \s* - auto const space_only_regex = strings::regex_program::create(R"(^\s*$)"); - auto const extra_null_rows = strings::matches_re(input_view, *space_only_regex); - auto const extra_mask = unary_operation(*extra_null_rows, unary_operator::NOT); - - auto const original_mask = mask_to_bools(input_view.null_mask(), 0, input_view.size()); - auto const new_mask = binary_operation( - *original_mask, *extra_mask, binary_operator::BITWISE_AND, data_type(type_id::BOOL8)); + auto const new_mask = [&] { + auto const extra_null_rows = strings::matches_re(input_view, *space_only_regex); + auto extra_mask = unary_operation(*extra_null_rows, unary_operator::NOT); + if (input_view.null_count() > 0) { + return binary_operation(*mask_to_bools(input_view.null_mask(), 0, input_view.size()), + *extra_mask, + binary_operator::BITWISE_AND, + data_type(type_id::BOOL8)); + } else { + return extra_mask; + } + }(); auto const [null_mask, null_count] = bools_to_mask(*new_mask); unmatched_implies_zero->set_null_mask(*null_mask, null_count); diff --git a/src/test/java/com/nvidia/spark/rapids/jni/CastStringsTest.java b/src/test/java/com/nvidia/spark/rapids/jni/CastStringsTest.java index 5f872ffdcb..c39766454a 100644 --- a/src/test/java/com/nvidia/spark/rapids/jni/CastStringsTest.java +++ b/src/test/java/com/nvidia/spark/rapids/jni/CastStringsTest.java @@ -193,10 +193,45 @@ void castToDecimalNoStripTest() { } } + private void convTestInternal(Table input, Table expected, int fromBase) { + try( + ColumnVector intCol = CastStrings.toIntegersWithBase(input.getColumn(0), fromBase, false, + DType.UINT64); + ColumnVector decStrCol = CastStrings.fromIntegersWithBase(intCol, 10); + ColumnVector hexStrCol = CastStrings.fromIntegersWithBase(intCol, 16); + ) { + AssertUtils.assertColumnsAreEqual(expected.getColumn(0), decStrCol, "decStrCol"); + AssertUtils.assertColumnsAreEqual(expected.getColumn(1), hexStrCol, "hexStrCol"); + } + } @Test - void baseDec2HexTest() { - try( + void baseDec2HexTestNoNulls() { + try ( + Table input = new Table.TestBuilder().column( + "510", + "00510", + "00-510" + ).build(); + + Table expected = new Table.TestBuilder().column( + "510", + "510", + "0" + ).column( + "1FE", + "1FE", + "0" + ).build() + ) + { + convTestInternal(input, expected, 10); + } + } + + @Test + void baseDec2HexTestMixed() { + try ( Table input = new Table.TestBuilder().column( null, " ", @@ -229,16 +264,10 @@ void baseDec2HexTest() { "1FE", "1FE", "0" - ).build(); - - ColumnVector intCol = CastStrings.toIntegersWithBase(input.getColumn(0), 10, false, - DType.UINT64); - ColumnVector decStrCol = CastStrings.fromIntegersWithBase(intCol, 10); - ColumnVector hexStrCol = CastStrings.fromIntegersWithBase(intCol, 16); - ) { - ai.rapids.cudf.TableDebug.get().debug("intCol", intCol); - AssertUtils.assertColumnsAreEqual(expected.getColumn(0), decStrCol, "decStrCol"); - AssertUtils.assertColumnsAreEqual(expected.getColumn(1), hexStrCol, "hexStrCol"); + ).build() + ) + { + convTestInternal(input, expected, 10); } } @@ -290,14 +319,9 @@ void baseHex2DecTest() { "0", "0" ).build(); - - ColumnVector intCol = CastStrings.toIntegersWithBase(input.getColumn(0), 16, false, DType.UINT64); - ColumnVector decStrCol = CastStrings.fromIntegersWithBase(intCol, 10); - ColumnVector hexStrCol = CastStrings.fromIntegersWithBase(intCol, 16); - ) { - ai.rapids.cudf.TableDebug.get().debug("intCol", intCol); - AssertUtils.assertColumnsAreEqual(expected.getColumn(0), decStrCol, "decStrCol"); - AssertUtils.assertColumnsAreEqual(expected.getColumn(1), hexStrCol, "hexStrCol"); + ) + { + convTestInternal(input, expected, 16); } } } From fbea92833e16d715e9ca556c798867fe4bf36fa7 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Thu, 24 Aug 2023 04:22:49 +0800 Subject: [PATCH 060/136] Update submodule cudf to 2700111e6b300cfff41b4e9137093bd22a00d1d4 (#1367) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 62148b4271..2700111e6b 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 62148b42718e6ddd466e4f31a03d8c98a48ed191 +Subproject commit 2700111e6b300cfff41b4e9137093bd22a00d1d4 From ac898cee814bd004039ceb399e1296cdb2f092ec Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Thu, 24 Aug 2023 11:10:02 +0800 Subject: [PATCH 061/136] Update submodule cudf to 171fc91a6e67e50cce8391457f92729044ddc86b (#1369) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 2700111e6b..171fc91a6e 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 2700111e6b300cfff41b4e9137093bd22a00d1d4 +Subproject commit 171fc91a6e67e50cce8391457f92729044ddc86b From 8173b1799bd1d63529e8b6eeed57c8ffaa5864ef Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Thu, 24 Aug 2023 17:09:16 +0800 Subject: [PATCH 062/136] Update submodule cudf to 83f9cbfbe629680f5e7c0de679bf94eb3f971159 (#1370) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 171fc91a6e..83f9cbfbe6 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 171fc91a6e67e50cce8391457f92729044ddc86b +Subproject commit 83f9cbfbe629680f5e7c0de679bf94eb3f971159 From 88c0a9fcad60ecfce5968f27d21c789b87c80e8f Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Thu, 24 Aug 2023 21:08:51 +0800 Subject: [PATCH 063/136] Update submodule cudf to f70f2cd415a1d8b2af2f0343fc2003101691f5c4 (#1371) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 83f9cbfbe6..f70f2cd415 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 83f9cbfbe629680f5e7c0de679bf94eb3f971159 +Subproject commit f70f2cd415a1d8b2af2f0343fc2003101691f5c4 From 5b819e66c676643b8fa9bf6a1ac614a693cf395d Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Fri, 25 Aug 2023 11:10:16 +0800 Subject: [PATCH 064/136] Update submodule cudf to 6095a92395e32d96ad3595c5610d52b62886cc20 (#1372) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index f70f2cd415..6095a92395 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit f70f2cd415a1d8b2af2f0343fc2003101691f5c4 +Subproject commit 6095a92395e32d96ad3595c5610d52b62886cc20 From 03ff0ca34ec5e01a36243ecca10bf18b36f80fab Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Sat, 26 Aug 2023 05:09:05 +0800 Subject: [PATCH 065/136] Update submodule cudf to ec1e73f8d04563c95fb5e0eb775c2e8c65ee0d64 (#1373) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 6095a92395..ec1e73f8d0 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 6095a92395e32d96ad3595c5610d52b62886cc20 +Subproject commit ec1e73f8d04563c95fb5e0eb775c2e8c65ee0d64 From b9992cd47ee7d669a1283544ead1caf9ca23b6bc Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Sat, 26 Aug 2023 11:09:11 +0800 Subject: [PATCH 066/136] Update submodule cudf to b6d08cae87aa489706a1fc1eefde5c1efe3f3ebf (#1374) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index ec1e73f8d0..b6d08cae87 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit ec1e73f8d04563c95fb5e0eb775c2e8c65ee0d64 +Subproject commit b6d08cae87aa489706a1fc1eefde5c1efe3f3ebf From fc60aea91c7d8aa34ba47cb6e449710e84c87c02 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Sat, 26 Aug 2023 20:20:55 +0800 Subject: [PATCH 067/136] Update submodule cudf to a025db54a92ad967827ad6f6f2b251065fe09c73 (#1376) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index b6d08cae87..a025db54a9 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit b6d08cae87aa489706a1fc1eefde5c1efe3f3ebf +Subproject commit a025db54a92ad967827ad6f6f2b251065fe09c73 From ebe95426f9173f4bb444accd4e38ce379ba3efb8 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Mon, 28 Aug 2023 17:09:03 +0800 Subject: [PATCH 068/136] Update submodule cudf to 2c7f02c399e58538a7f772e86839c05d3e80ca19 (#1378) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index a025db54a9..2c7f02c399 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit a025db54a92ad967827ad6f6f2b251065fe09c73 +Subproject commit 2c7f02c399e58538a7f772e86839c05d3e80ca19 From 62f97fc26c60f80fb251e838c0f45498acbb2187 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Mon, 28 Aug 2023 20:21:07 +0800 Subject: [PATCH 069/136] Update submodule cudf to aba001c12f8db876ab7b763fcde939dba9efd665 (#1379) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 2c7f02c399..aba001c12f 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 2c7f02c399e58538a7f772e86839c05d3e80ca19 +Subproject commit aba001c12f8db876ab7b763fcde939dba9efd665 From 24b5c39a5eda372fdf89ceffa64cbbad56ef2624 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Mon, 28 Aug 2023 22:23:32 +0800 Subject: [PATCH 070/136] Update submodule cudf to d138dd0c9c365e03891d33cf4423a553629a3f6b (#1380) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index aba001c12f..d138dd0c9c 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit aba001c12f8db876ab7b763fcde939dba9efd665 +Subproject commit d138dd0c9c365e03891d33cf4423a553629a3f6b From 3fa7b9aa58f40458cab93edca5314af2a6241b68 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Tue, 29 Aug 2023 05:12:16 +0800 Subject: [PATCH 071/136] Update submodule cudf to 3c8ce98e00e5a2b686cda690620f2a519d2a8e3d (#1381) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index d138dd0c9c..3c8ce98e00 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit d138dd0c9c365e03891d33cf4423a553629a3f6b +Subproject commit 3c8ce98e00e5a2b686cda690620f2a519d2a8e3d From f9281d930fac0cca9eb3c30d8b26c24a67368224 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Tue, 29 Aug 2023 11:17:11 +0800 Subject: [PATCH 072/136] Update submodule cudf to 70fbec809a45fb4d462d7f3ef22464d00d2640e0 (#1382) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 3c8ce98e00..70fbec809a 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 3c8ce98e00e5a2b686cda690620f2a519d2a8e3d +Subproject commit 70fbec809a45fb4d462d7f3ef22464d00d2640e0 From 29c9565ba0731f614a319c5cced6c3df1a8ebd1c Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Wed, 30 Aug 2023 05:10:25 +0800 Subject: [PATCH 073/136] Update submodule cudf to e2e92c46741ea6ef71a657a2cdbc3c010497943e (#1383) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 70fbec809a..e2e92c4674 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 70fbec809a45fb4d462d7f3ef22464d00d2640e0 +Subproject commit e2e92c46741ea6ef71a657a2cdbc3c010497943e From 833eba493923a21dd8d781638e8ed875af48b85f Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Wed, 30 Aug 2023 11:08:28 +0800 Subject: [PATCH 074/136] Update submodule cudf to 14522003f3bbd8041e66b1ff34077acdae4869ba (#1384) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index e2e92c4674..14522003f3 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit e2e92c46741ea6ef71a657a2cdbc3c010497943e +Subproject commit 14522003f3bbd8041e66b1ff34077acdae4869ba From c38bc8e0c8474b328e8f424847fadc289b410551 Mon Sep 17 00:00:00 2001 From: Gera Shegalov Date: Wed, 30 Aug 2023 07:00:25 -0700 Subject: [PATCH 075/136] Use HostMemoryAllocator in ParquetFooter (#1377) * Use HostMemoryAllocator in ParquetFooter depends on rapidsai/cudf#13975 Signed-off-by: Gera Shegalov * Fix build Signed-off-by: Gera Shegalov --------- Signed-off-by: Gera Shegalov --- src/main/cpp/src/NativeParquetJni.cpp | 4 ++-- .../java/com/nvidia/spark/rapids/jni/ParquetFooter.java | 9 +++++++-- thirdparty/cudf | 2 +- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/src/main/cpp/src/NativeParquetJni.cpp b/src/main/cpp/src/NativeParquetJni.cpp index c6d90be0cc..06f29f3f90 100644 --- a/src/main/cpp/src/NativeParquetJni.cpp +++ b/src/main/cpp/src/NativeParquetJni.cpp @@ -782,7 +782,7 @@ JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_ParquetFooter_getNumCol } JNIEXPORT jobject JNICALL Java_com_nvidia_spark_rapids_jni_ParquetFooter_serializeThriftFile( - JNIEnv* env, jclass, jlong handle) + JNIEnv* env, jclass, jlong handle, jobject host_memory_allocator) { CUDF_FUNC_RANGE(); try { @@ -798,7 +798,7 @@ JNIEXPORT jobject JNICALL Java_com_nvidia_spark_rapids_jni_ParquetFooter_seriali transportOut->getBuffer(&buf_ptr, &buf_size); // 12 extra is for the MAGIC thrift_footer length MAGIC - jobject ret = cudf::jni::allocate_host_buffer(env, buf_size + 12, false); + jobject ret = cudf::jni::allocate_host_buffer(env, buf_size + 12, false, host_memory_allocator); uint8_t* ret_addr = reinterpret_cast(cudf::jni::get_host_buffer_address(env, ret)); ret_addr[0] = 'P'; ret_addr[1] = 'A'; diff --git a/src/main/java/com/nvidia/spark/rapids/jni/ParquetFooter.java b/src/main/java/com/nvidia/spark/rapids/jni/ParquetFooter.java index cd836869eb..681a01d81d 100644 --- a/src/main/java/com/nvidia/spark/rapids/jni/ParquetFooter.java +++ b/src/main/java/com/nvidia/spark/rapids/jni/ParquetFooter.java @@ -103,8 +103,12 @@ private ParquetFooter(long handle) { * footer file. This will include the MAGIC PAR1 at the beginning and end and also the * length of the footer just before the PAR1 at the end. */ + public HostMemoryBuffer serializeThriftFile(HostMemoryAllocator hostMemoryAllocator) { + return serializeThriftFile(nativeHandle, hostMemoryAllocator); + } + public HostMemoryBuffer serializeThriftFile() { - return serializeThriftFile(nativeHandle); + return serializeThriftFile(DefaultHostMemoryAllocator.get()); } /** @@ -232,5 +236,6 @@ private static native long readAndFilter(long address, long length, private static native int getNumColumns(long nativeHandle); - private static native HostMemoryBuffer serializeThriftFile(long nativeHandle); + private static native HostMemoryBuffer serializeThriftFile(long nativeHandle, + HostMemoryAllocator hostMemoryAllocator); } diff --git a/thirdparty/cudf b/thirdparty/cudf index 14522003f3..7b9f4a1757 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 14522003f3bbd8041e66b1ff34077acdae4869ba +Subproject commit 7b9f4a17579befd902d1c30af38daa5fe493e335 From 4610dea47c7b0f300e952cb5d36a28a9dd7252a9 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Thu, 31 Aug 2023 05:10:28 +0800 Subject: [PATCH 076/136] [submodule-sync] bot-submodule-sync-branch-23.10 to branch-23.10 [skip ci] [bot] (#1386) * Update submodule cudf to 7b9f4a17579befd902d1c30af38daa5fe493e335 Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> * Update submodule cudf to f999e1c5ed183253585606fdfc7552a224aee2d7 Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --------- Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 7b9f4a1757..f999e1c5ed 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 7b9f4a17579befd902d1c30af38daa5fe493e335 +Subproject commit f999e1c5ed183253585606fdfc7552a224aee2d7 From 5ac46b07271db5f3c3d78b1486808bb26152e35e Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Thu, 31 Aug 2023 11:10:28 +0800 Subject: [PATCH 077/136] Update submodule cudf to c73ff70dc5ad85d71a0719606c688c2447d55d85 (#1389) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index f999e1c5ed..c73ff70dc5 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit f999e1c5ed183253585606fdfc7552a224aee2d7 +Subproject commit c73ff70dc5ad85d71a0719606c688c2447d55d85 From 10b8ebf6beb28b661d9769b009f2b9d819ee66f7 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Fri, 1 Sep 2023 11:09:06 +0800 Subject: [PATCH 078/136] Update submodule cudf to ad9fa501192332ca8ce310ffe967473ec0945a97 (#1390) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index c73ff70dc5..ad9fa50119 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit c73ff70dc5ad85d71a0719606c688c2447d55d85 +Subproject commit ad9fa501192332ca8ce310ffe967473ec0945a97 From 7e6dfc054d04cebdc3968902f9f109cf52313301 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Fri, 1 Sep 2023 17:09:42 +0800 Subject: [PATCH 079/136] Update submodule cudf to 27e433ad837e72c71acd37376c98b2e5aeb450ad (#1391) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index ad9fa50119..27e433ad83 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit ad9fa501192332ca8ce310ffe967473ec0945a97 +Subproject commit 27e433ad837e72c71acd37376c98b2e5aeb450ad From 7884c9505f0e66906cd405f345ab7ede0e50564a Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Sat, 2 Sep 2023 05:09:16 +0800 Subject: [PATCH 080/136] Update submodule cudf to d1fb671128a55f965a7db907e99d5b1a841c2213 (#1392) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 27e433ad83..d1fb671128 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 27e433ad837e72c71acd37376c98b2e5aeb450ad +Subproject commit d1fb671128a55f965a7db907e99d5b1a841c2213 From 7fcb24fd72cb5dabfbadde4301231d50b8c75523 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Sat, 2 Sep 2023 11:10:24 +0800 Subject: [PATCH 081/136] Update submodule cudf to 2b7294b9afe413b8f6b956dc5148452ca0161e7f (#1393) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index d1fb671128..2b7294b9af 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit d1fb671128a55f965a7db907e99d5b1a841c2213 +Subproject commit 2b7294b9afe413b8f6b956dc5148452ca0161e7f From 6e56ceeb797294ec7fc03b53f8eaf43be8daefbd Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Sat, 2 Sep 2023 17:09:57 +0800 Subject: [PATCH 082/136] Update submodule cudf to 0c829cc0b868c288c3591771d555617d4d978ce3 (#1394) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 2b7294b9af..0c829cc0b8 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 2b7294b9afe413b8f6b956dc5148452ca0161e7f +Subproject commit 0c829cc0b868c288c3591771d555617d4d978ce3 From 5fb97678c6b0c1ead16d5abd9ed6f92d4c2347fd Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Mon, 4 Sep 2023 17:09:04 +0800 Subject: [PATCH 083/136] Update submodule cudf to c51633627ee7087542ad4c315c0e139dea58e408 (#1395) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 0c829cc0b8..c51633627e 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 0c829cc0b868c288c3591771d555617d4d978ce3 +Subproject commit c51633627ee7087542ad4c315c0e139dea58e408 From 444c1349dd9d2ee114773658c7322f12ced33e70 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Tue, 5 Sep 2023 05:09:02 +0800 Subject: [PATCH 084/136] Update submodule cudf to 3e5f019697252f6c300639a09eb67ff11a80ac43 (#1396) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index c51633627e..3e5f019697 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit c51633627ee7087542ad4c315c0e139dea58e408 +Subproject commit 3e5f019697252f6c300639a09eb67ff11a80ac43 From d44482f73828b3549234b8bb3f514552a6a0a21c Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Wed, 6 Sep 2023 11:09:09 +0800 Subject: [PATCH 085/136] Update submodule cudf to 0b01fe49c8d5963e7be07e6dac2b78f842461db3 (#1397) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 3e5f019697..0b01fe49c8 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 3e5f019697252f6c300639a09eb67ff11a80ac43 +Subproject commit 0b01fe49c8d5963e7be07e6dac2b78f842461db3 From 6fbf24ffed64f7c2bcb4da3c9a772ef028a19468 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Wed, 6 Sep 2023 17:16:22 +0800 Subject: [PATCH 086/136] Update submodule cudf to c82a70807849188274d21b595d5ded818aad4464 (#1398) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 0b01fe49c8..c82a708078 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 0b01fe49c8d5963e7be07e6dac2b78f842461db3 +Subproject commit c82a70807849188274d21b595d5ded818aad4464 From 1d317ca28d1d20aa02c85c7e5f984e0feda4d21b Mon Sep 17 00:00:00 2001 From: Nghia Truong <7416935+ttnghia@users.noreply.github.com> Date: Wed, 6 Sep 2023 11:17:43 -0700 Subject: [PATCH 087/136] Implement timestamp rebase from Gregorian to Julian calendars (#1375) * Add date-time utils * Working implementation * Rename vars * Fix error * Complete tests for day timestamp * Add tests for micros * Cleanup * Rename files * Adapt the changes due to renaming * Add more test * Cleanup * Cleanup and docs * Update file names * Fix return type Signed-off-by: Nghia Truong * Fix variable name Signed-off-by: Nghia Truong * Fix typo Signed-off-by: Nghia Truong * Setup C++ tests Signed-off-by: Nghia Truong * Fix namespace Signed-off-by: Nghia Truong * Add Java binding and Java tests Signed-off-by: Nghia Truong * Add comment and type checks Signed-off-by: Nghia Truong * Fix corner case when the input date is outside of all calendars Signed-off-by: Nghia Truong * Add more test Signed-off-by: Nghia Truong --------- Signed-off-by: Nghia Truong Signed-off-by: Nghia Truong --- src/main/cpp/CMakeLists.txt | 2 + src/main/cpp/src/DateTimeRebaseJni.cpp | 35 +++ src/main/cpp/src/datetime_rebase.cu | 230 ++++++++++++++++++ src/main/cpp/src/datetime_rebase.hpp | 23 ++ src/main/cpp/tests/CMakeLists.txt | 3 + src/main/cpp/tests/datetime_rebase.cpp | 125 ++++++++++ .../spark/rapids/jni/DateTimeRebase.java | 42 ++++ .../spark/rapids/jni/DateTimeRebaseTest.java | 68 ++++++ 8 files changed, 528 insertions(+) create mode 100644 src/main/cpp/src/DateTimeRebaseJni.cpp create mode 100644 src/main/cpp/src/datetime_rebase.cu create mode 100644 src/main/cpp/src/datetime_rebase.hpp create mode 100644 src/main/cpp/tests/datetime_rebase.cpp create mode 100644 src/main/java/com/nvidia/spark/rapids/jni/DateTimeRebase.java create mode 100644 src/test/java/com/nvidia/spark/rapids/jni/DateTimeRebaseTest.java diff --git a/src/main/cpp/CMakeLists.txt b/src/main/cpp/CMakeLists.txt index ab1775554e..37b2579dd2 100644 --- a/src/main/cpp/CMakeLists.txt +++ b/src/main/cpp/CMakeLists.txt @@ -148,6 +148,7 @@ add_library( spark_rapids_jni SHARED src/BloomFilterJni.cpp src/CastStringJni.cpp + src/DateTimeRebaseJni.cpp src/DecimalUtilsJni.cpp src/HashJni.cpp src/MapUtilsJni.cpp @@ -159,6 +160,7 @@ add_library( src/cast_decimal_to_string.cu src/cast_string.cu src/cast_string_to_float.cu + src/datetime_rebase.cu src/decimal_utils.cu src/map_utils.cu src/murmur_hash.cu diff --git a/src/main/cpp/src/DateTimeRebaseJni.cpp b/src/main/cpp/src/DateTimeRebaseJni.cpp new file mode 100644 index 0000000000..a1725c6c8a --- /dev/null +++ b/src/main/cpp/src/DateTimeRebaseJni.cpp @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "cudf_jni_apis.hpp" +#include "datetime_rebase.hpp" + +extern "C" { + +JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_DateTimeRebase_rebaseGregorianToJulian( + JNIEnv *env, jclass, jlong input) { + JNI_NULL_CHECK(env, input, "input column is null", 0); + + try { + cudf::jni::auto_set_device(env); + auto const input_cv = reinterpret_cast(input); + auto output = spark_rapids_jni::rebase_gregorian_to_julian(*input_cv); + return reinterpret_cast(output.release()); + } + CATCH_STD(env, 0); +} + +} // extern "C" diff --git a/src/main/cpp/src/datetime_rebase.cu b/src/main/cpp/src/datetime_rebase.cu new file mode 100644 index 0000000000..5a7ab22ff8 --- /dev/null +++ b/src/main/cpp/src/datetime_rebase.cu @@ -0,0 +1,230 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "datetime_rebase.hpp" + +// +#include +#include +#include +#include +#include + +// +#include + +// +#include +#include + +namespace { + +// Convert a date in Julian calendar to the number of days since epoch. +__device__ __inline__ auto days_from_julian(cuda::std::chrono::year_month_day const &ymd) { + auto const month = static_cast(ymd.month()); + auto const day = static_cast(ymd.day()); + auto const year = static_cast(ymd.year()) - (month <= 2); + + // Follow the implementation from https://howardhinnant.github.io/date_algorithms.html + int32_t const era = (year >= 0 ? year : year - 3) / 4; + uint32_t const year_of_era = static_cast(year - era * 4); // [0, 3] + uint32_t const day_of_year = (153 * (month + (month > 2 ? -3 : 9)) + 2) / 5 + day - 1; // [0, 365] + uint32_t const day_of_era = year_of_era * 365 + day_of_year; // [0, 1460] + return era * 1461 + static_cast(day_of_era) - 719470; +} + +// Convert the given number of days since the epoch day 1970-01-01 to a local date in Proleptic +// Gregorian calendar, reinterpreting the result as in Julian calendar, then compute the number of +// days since the epoch from that Julian local date. +// This is to match with Apache Spark's `localRebaseGregorianToJulianDays` function. +std::unique_ptr gregorian_to_julian_days(cudf::column_view const &input, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource *mr) { + CUDF_EXPECTS(input.type().id() == cudf::type_id::TIMESTAMP_DAYS, + "The input column type must be microsecond timestamp.", std::invalid_argument); + + auto output = cudf::make_timestamp_column(input.type(), input.size(), + cudf::detail::copy_bitmask(input, stream, mr), + input.null_count(), stream, mr); + + thrust::transform( + rmm::exec_policy(stream), thrust::make_counting_iterator(0), + thrust::make_counting_iterator(input.size()), + output->mutable_view().begin(), + [d_input = input.begin()] __device__(auto const idx) { + auto constexpr julian_end = cuda::std::chrono::year_month_day{ + cuda::std::chrono::year{1582}, cuda::std::chrono::month{10}, cuda::std::chrono::day{4}}; + auto constexpr gregorian_start = cuda::std::chrono::year_month_day{ + cuda::std::chrono::year{1582}, cuda::std::chrono::month{10}, + cuda::std::chrono::day{15}}; + + auto const days_ts = d_input[idx].time_since_epoch().count(); + auto const days_since_epoch = cuda::std::chrono::sys_days(cudf::duration_D{days_ts}); + + // Convert the input into local date in Proleptic Gregorian calendar. + auto const ymd = cuda::std::chrono::year_month_day(days_since_epoch); + if (ymd > julian_end && ymd < gregorian_start) { + // This is the same as rebasing from the local date given at `gregorian_start`. + return cudf::timestamp_D{cudf::duration_D{-141427}}; + } + + // No change since this time. + if (ymd >= gregorian_start) { + return d_input[idx]; + } + + // Reinterpret year/month/day as in Julian calendar then compute the days since epoch. + return cudf::timestamp_D{cudf::duration_D{days_from_julian(ymd)}}; + }); + + return output; +} + +/** + * @brief Struct store results of extracting time components from a timestamp. + */ +struct time_components { + int32_t hour; + int32_t minute; + int32_t second; + int32_t subsecond; +}; + +/** + * @brief Specialized modulo expression that handles negative values. + * + * @code{.pseudo} + * Examples: + * modulo(1,60) -> 1 + * modulo(-1,60) -> 59 + * @endcode + */ +__device__ __inline__ auto modulo_time(int64_t time, int64_t base) { + return static_cast(((time % base) + base) % base); +} + +/** + * @brief This function handles converting units by dividing and adjusting for negative values. + * + * @code{.pseudo} + * Examples: + * scale(-61,60) -> -2 + * scale(-60,60) -> -1 + * scale(-59,60) -> -1 + * scale( 59,60) -> 0 + * scale( 60,60) -> 1 + * scale( 61,60) -> 1 + * @endcode + */ +__device__ __inline__ int64_t scale_time(int64_t time, int64_t base) { + return (time - ((time < 0) * (base - 1L))) / base; +} + +int64_t constexpr MICROS_PER_SECOND = 1'000'000L; + +__device__ __inline__ time_components get_time_components(int64_t micros) { + auto const subsecond = modulo_time(micros, MICROS_PER_SECOND); + + // Convert microseconds to seconds. + micros = micros / MICROS_PER_SECOND - ((micros < 0) && (subsecond != 0)); + + auto const hour = modulo_time(scale_time(micros, 3600), 24); + auto const minute = modulo_time(scale_time(micros, 60), 60); + auto const second = modulo_time(micros, 60); + + return time_components{hour, minute, second, subsecond}; +} + +// Convert the given number of microseconds since the epoch day 1970-01-01T00:00:00Z to a local +// date-time in Proleptic Gregorian calendar, reinterpreting the result as in Julian calendar, then +// compute the number of microseconds since the epoch from that Julian local date-time. +// This is to match with Apache Spark's `rebaseGregorianToJulianMicros` function with timezone +// fixed to UTC. +std::unique_ptr gregorian_to_julian_micros(cudf::column_view const &input, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource *mr) { + CUDF_EXPECTS(input.type().id() == cudf::type_id::TIMESTAMP_MICROSECONDS, + "The input column type must be microsecond timestamp.", std::invalid_argument); + + auto output = cudf::make_timestamp_column(input.type(), input.size(), + cudf::detail::copy_bitmask(input, stream, mr), + input.null_count(), stream, mr); + + thrust::transform( + rmm::exec_policy(stream), thrust::make_counting_iterator(0), + thrust::make_counting_iterator(input.size()), + output->mutable_view().begin(), + [d_input = input.begin()] __device__(auto const idx) { + // This timestamp corresponds to October 15th, 1582 UTC. + // After this day, there is no difference in microsecond values between Gregorian + // and Julian calendars. + int64_t constexpr last_switch_gregorian_ts = -12219292800000000L; + + auto const micros_ts = d_input[idx].time_since_epoch().count(); + if (micros_ts >= last_switch_gregorian_ts) { + return d_input[idx]; + } + + // Convert the input into local date-time in Proleptic Gregorian calendar. + auto const days_since_epoch = cuda::std::chrono::sys_days(static_cast( + cuda::std::chrono::floor(cudf::duration_us(micros_ts)))); + auto const ymd = cuda::std::chrono::year_month_day(days_since_epoch); + auto const timeparts = get_time_components(micros_ts); + + auto constexpr julian_end = cuda::std::chrono::year_month_day{ + cuda::std::chrono::year{1582}, cuda::std::chrono::month{10}, cuda::std::chrono::day{4}}; + auto constexpr gregorian_start = cuda::std::chrono::year_month_day{ + cuda::std::chrono::year{1582}, cuda::std::chrono::month{10}, + cuda::std::chrono::day{15}}; + + // Reinterpret the local date-time as in Julian calendar and compute microseconds since + // the epoch from that Julian local date-time. + // If the input date is outside of both calendars, consider it as it is a local date + // given at `gregorian_start` (-141427 Julian days since epoch). + auto const julian_days = + (ymd > julian_end && ymd < gregorian_start) ? -141427 : days_from_julian(ymd); + int64_t result = (julian_days * 24L * 3600L) + (timeparts.hour * 3600L) + + (timeparts.minute * 60L) + timeparts.second; + result *= MICROS_PER_SECOND; // to microseconds + result += timeparts.subsecond; + + return cudf::timestamp_us{cudf::duration_us{result}}; + }); + + return output; +} + +} // namespace + +namespace spark_rapids_jni { + +std::unique_ptr rebase_gregorian_to_julian(cudf::column_view const &input) { + auto const type = input.type().id(); + CUDF_EXPECTS(type == cudf::type_id::TIMESTAMP_DAYS || + type == cudf::type_id::TIMESTAMP_MICROSECONDS, + "The input must be either day or microsecond timestamps to rebase."); + + if (input.size() == 0) { + return cudf::empty_like(input); + } + + auto const stream = cudf::get_default_stream(); + auto const mr = rmm::mr::get_current_device_resource(); + return type == cudf::type_id::TIMESTAMP_DAYS ? gregorian_to_julian_days(input, stream, mr) : + gregorian_to_julian_micros(input, stream, mr); +} + +} // namespace spark_rapids_jni diff --git a/src/main/cpp/src/datetime_rebase.hpp b/src/main/cpp/src/datetime_rebase.hpp new file mode 100644 index 0000000000..96c65a6417 --- /dev/null +++ b/src/main/cpp/src/datetime_rebase.hpp @@ -0,0 +1,23 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +namespace spark_rapids_jni { + +std::unique_ptr rebase_gregorian_to_julian(cudf::column_view const &input); + +} // namespace spark_rapids_jni diff --git a/src/main/cpp/tests/CMakeLists.txt b/src/main/cpp/tests/CMakeLists.txt index a377c0c1f4..5b95291351 100644 --- a/src/main/cpp/tests/CMakeLists.txt +++ b/src/main/cpp/tests/CMakeLists.txt @@ -51,6 +51,9 @@ ConfigureTest(CAST_STRING ConfigureTest(CAST_DECIMAL_TO_STRING cast_decimal_to_string.cpp) +ConfigureTest(DATETIME_REBASE + datetime_rebase.cpp) + ConfigureTest(ROW_CONVERSION row_conversion.cpp) diff --git a/src/main/cpp/tests/datetime_rebase.cpp b/src/main/cpp/tests/datetime_rebase.cpp new file mode 100644 index 0000000000..647ab1656c --- /dev/null +++ b/src/main/cpp/tests/datetime_rebase.cpp @@ -0,0 +1,125 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +// + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using days_col = cudf::test::fixed_width_column_wrapper; +using micros_col = + cudf::test::fixed_width_column_wrapper; + +struct TimestampRebaseTest : public cudf::test::BaseFixture {}; + +TEST_F(TimestampRebaseTest, DayTimestamp) { + auto const ts_col = days_col{-719162, -354285, -141714, -141438, -141437, -141432, + -141427, -31463, -31453, -1, 0, 18335}; + + // Check the correctness of timestamp values. They should be the instants as given in ts_strings. + { + auto const ts_strings = + cudf::test::strings_column_wrapper{"0001-01-01", "1000-01-01", "1582-01-01", "1582-10-04", + "1582-10-05", // After Julian but before Gregorian + "1582-10-10", // After Julian but before Gregorian + "1582-10-15", // Gregorian cutover day + "1883-11-10", "1883-11-20", "1969-12-31", + "1970-01-01", // The epoch day + "2020-03-14"}; + auto const parsed_ts = + cudf::strings::to_timestamps(cudf::strings_column_view(ts_strings), + cudf::data_type{cudf::type_id::TIMESTAMP_DAYS}, "%Y-%m-%d"); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(ts_col, *parsed_ts); + } + + // Check the rebased values. + { + auto const rebased = spark_rapids_jni::rebase_gregorian_to_julian(ts_col); + auto const expected = days_col{-719164, -354280, -141704, -141428, -141427, -141427, + -141427, -31463, -31453, -1, 0, 18335}; + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *rebased, cudf::test::debug_output_level::ALL_ERRORS); + } +} + +TEST_F(TimestampRebaseTest, DayTimestampOfNegativeYear) { + // Negative years cannot be parsed by cudf from strings. + auto const ts_col = days_col{ + -1121294, // -1100-1-1 + -1100777, // -1044-3-5 + -735535 // -44-3-5 + }; + auto const rebased = spark_rapids_jni::rebase_gregorian_to_julian(ts_col); + auto const expected = days_col{-1121305, -1100787, -735537}; + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *rebased); +} + +TEST_F(TimestampRebaseTest, MicroTimestamp) { + auto const ts_col = + micros_col{-62135593076345679L, -30610213078876544L, -12244061221876544L, -12220243200000000L, + -12219639001448163L, -12219292799000001L, -45446999900L, 1L, + 1584178381500000L}; + + // Check the correctness of ts_val. It should be the instant as given in ts_string. + { + + auto const ts_string = cudf::test::strings_column_wrapper{ + "0001-01-01 01:02:03.654321", "1000-01-01 03:02:01.123456", + "1582-01-01 07:52:58.123456", "1582-10-04 00:00:00.000000", + "1582-10-10 23:49:58.551837", // After Julian but before Gregorian + "1582-10-15 00:00:00.999999", // Gregorian cutover day + "1969-12-31 11:22:33.000100", + "1970-01-01 00:00:00.000001", // The epoch day + "2020-03-14 09:33:01.500000"}; + auto const parsed_ts = cudf::strings::to_timestamps( + cudf::strings_column_view(ts_string), + cudf::data_type{cudf::type_id::TIMESTAMP_MICROSECONDS}, "%Y-%m-%d %H:%M:%S.%6fz"); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(ts_col, *parsed_ts); + } + + // Check the rebased values. + { + auto const rebased = spark_rapids_jni::rebase_gregorian_to_julian(ts_col); + auto const expected = micros_col{ + -62135765876345679L, -30609781078876544L, -12243197221876544L, -12219379200000000L, + -12219207001448163L, -12219292799000001L, -45446999900L, 1L, + 1584178381500000L}; + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *rebased); + } +} + +TEST_F(TimestampRebaseTest, MicroTimestampOfNegativeYear) { + auto const ts_col = micros_col{ + -93755660276345679L, //-1001-01-01T01:02:03.654321 + -219958671476876544L, //-5001-10-15T01:02:03.123456 + -62188210676345679L //-0001-05-03T01:02:03.654321 + }; + + // Check the rebased values. + { + auto const rebased = spark_rapids_jni::rebase_gregorian_to_julian(ts_col); + auto const expected = + micros_col{-93756524276345679L, -219962127476876544L, -62188383476345679L}; + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *rebased); + } +} diff --git a/src/main/java/com/nvidia/spark/rapids/jni/DateTimeRebase.java b/src/main/java/com/nvidia/spark/rapids/jni/DateTimeRebase.java new file mode 100644 index 0000000000..007a6765d4 --- /dev/null +++ b/src/main/java/com/nvidia/spark/rapids/jni/DateTimeRebase.java @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.nvidia.spark.rapids.jni; + +import ai.rapids.cudf.*; + +/** + * Utility class for converting between column major and row major data + */ +public class DateTimeRebase { + static { + NativeDepsLoader.loadNativeDeps(); + } + + /** + * Convert the given timestamps as a number of days or microseconds since the epoch instant + * 1970-01-01T00:00:00Z to a local date-time in Proleptic Gregorian calendar, reinterpreting + * the result as in Julian calendar, then compute the number of days or microseconds since the + * epoch from that Julian local date-time. + * This is to match with Apache Spark's `localRebaseGregorianToJulianDays` and + * `rebaseGregorianToJulianMicros` functions with timezone fixed to UTC. + */ + public static ColumnVector rebaseGregorianToJulian(ColumnView input) { + return new ColumnVector(rebaseGregorianToJulian(input.getNativeView())); + } + + private static native long rebaseGregorianToJulian(long nativeHandle); +} diff --git a/src/test/java/com/nvidia/spark/rapids/jni/DateTimeRebaseTest.java b/src/test/java/com/nvidia/spark/rapids/jni/DateTimeRebaseTest.java new file mode 100644 index 0000000000..80a891ee1e --- /dev/null +++ b/src/test/java/com/nvidia/spark/rapids/jni/DateTimeRebaseTest.java @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.nvidia.spark.rapids.jni; + +import static ai.rapids.cudf.AssertUtils.assertColumnsAreEqual; + +import org.junit.jupiter.api.Test; + +import ai.rapids.cudf.ColumnVector; + +public class DateTimeRebaseTest { + @Test + void dayTimestampTest() { + try (ColumnVector input = ColumnVector.timestampDaysFromBoxedInts(-719162, -354285, null, + -141714, -141438, -141437, + null, null, + -141432, -141427, -31463, -31453, -1, 0, 18335); + ColumnVector expected = ColumnVector.timestampDaysFromBoxedInts(-719164, -354280, null, + -141704, -141428, -141427, + null, null, + -141427, -141427, -31463, -31453, -1, 0, 18335); + ColumnVector result = DateTimeRebase.rebaseGregorianToJulian(input)) { + assertColumnsAreEqual(expected, result); + } + } + + @Test + void microsecondTimestampTest() { + try (ColumnVector input = ColumnVector.timestampMicroSecondsFromBoxedLongs(-62135593076345679L, + -30610213078876544L, + null, + -12244061221876544L, + -12220243200000000L, + -12219292799000001L, + -45446999900L, + 1L, + null, + 1584178381500000L); + ColumnVector expected = + ColumnVector.timestampMicroSecondsFromBoxedLongs(-62135765876345679L, + -30609781078876544L, + null, + -12243197221876544L, + -12219379200000000L, + -12219292799000001L, + -45446999900L, + 1L, + null, + 1584178381500000L); + ColumnVector result = DateTimeRebase.rebaseGregorianToJulian(input)) { + assertColumnsAreEqual(expected, result); + } + } +} From 5a29f7d0bbc9b127e98343a6aab3306c5d2d67ae Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Thu, 7 Sep 2023 05:09:20 +0800 Subject: [PATCH 088/136] Update submodule cudf to 609f894fcd53b99acf0889562e78e706cb7812d8 (#1399) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index c82a708078..609f894fcd 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit c82a70807849188274d21b595d5ded818aad4464 +Subproject commit 609f894fcd53b99acf0889562e78e706cb7812d8 From 5991f801f0f5ae53f54da309b6def03bdf866a32 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Thu, 7 Sep 2023 11:09:08 +0800 Subject: [PATCH 089/136] Update submodule cudf to 0190c2921d0278f80328240b76a22e6628cb24f7 (#1400) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 609f894fcd..0190c2921d 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 609f894fcd53b99acf0889562e78e706cb7812d8 +Subproject commit 0190c2921d0278f80328240b76a22e6628cb24f7 From 678b168d20da3f181bd742af2e0c94f3abdbafbf Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Thu, 7 Sep 2023 17:09:20 +0800 Subject: [PATCH 090/136] Update submodule cudf to dd6553a22d6cfcc2f017775a57d7b49783d62a9c (#1401) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 0190c2921d..dd6553a22d 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 0190c2921d0278f80328240b76a22e6628cb24f7 +Subproject commit dd6553a22d6cfcc2f017775a57d7b49783d62a9c From e8a8a7d6a777721fee4f73922993b8f5cf15f9cd Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Fri, 8 Sep 2023 05:15:09 +0800 Subject: [PATCH 091/136] Update submodule cudf to c9d88219ce6e920b8fad977ade437bf87d1d5099 (#1402) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index dd6553a22d..c9d88219ce 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit dd6553a22d6cfcc2f017775a57d7b49783d62a9c +Subproject commit c9d88219ce6e920b8fad977ade437bf87d1d5099 From 5000d88ae51ee4dd77539eed175e98da39b08c82 Mon Sep 17 00:00:00 2001 From: Peixin Date: Fri, 8 Sep 2023 08:37:05 +0800 Subject: [PATCH 092/136] Enable arm64 build (#1385) Signed-off-by: Peixin Li --- CONTRIBUTING.md | 25 ++++++------ ci/Dockerfile.multi | 76 +++++++++++++++++++++++++++++++++++++ ci/nightly-build.sh | 18 +++++++-- pom.xml | 11 +++++- src/main/cpp/CMakeLists.txt | 7 +++- 5 files changed, 120 insertions(+), 17 deletions(-) create mode 100644 ci/Dockerfile.multi diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 1c045a137b..76e6571fbe 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -71,18 +71,19 @@ settings. If an explicit reconfigure of libcudf is needed (e.g.: when changing c The following build properties can be set on the Maven command-line (e.g.: `-DCPP_PARALLEL_LEVEL=4`) to control aspects of the build: -|Property Name |Description | Default | -|------------------------------------|---------------------------------------|---------| -|`CPP_PARALLEL_LEVEL` |Parallelism of the C++ builds | 10 | -|`GPU_ARCHS` |CUDA architectures to target | RAPIDS | -|`CUDF_USE_PER_THREAD_DEFAULT_STREAM`|CUDA per-thread default stream | ON | -|`RMM_LOGGING_LEVEL` |RMM logging control | OFF | -|`USE_GDS` |Compile with GPU Direct Storage support| OFF | -|`BUILD_TESTS` |Compile tests | OFF | -|`BUILD_BENCHMARKS` |Compile benchmarks | OFF | -|`libcudf.build.configure` |Force libcudf build to configure | false | -|`libcudf.clean.skip` |Whether to skip cleaning libcudf build | true | -|`submodule.check.skip` |Whether to skip checking git submodules| false | +| Property Name | Description | Default | +|--------------------------------------|-----------------------------------------|---------| +| `CPP_PARALLEL_LEVEL` | Parallelism of the C++ builds | 10 | +| `GPU_ARCHS` | CUDA architectures to target | RAPIDS | +| `CUDF_USE_PER_THREAD_DEFAULT_STREAM` | CUDA per-thread default stream | ON | +| `RMM_LOGGING_LEVEL` | RMM logging control | OFF | +| `USE_GDS` | Compile with GPU Direct Storage support | OFF | +| `BUILD_TESTS` | Compile tests | OFF | +| `BUILD_BENCHMARKS` | Compile benchmarks | OFF | +| `BUILD_FAULTINJ` | Compile fault injection | ON | +| `libcudf.build.configure` | Force libcudf build to configure | false | +| `libcudf.clean.skip` | Whether to skip cleaning libcudf build | true | +| `submodule.check.skip` | Whether to skip checking git submodules | false | ### Local testing of cross-repo contributions cudf, spark-rapids-jni, and spark-rapids diff --git a/ci/Dockerfile.multi b/ci/Dockerfile.multi new file mode 100644 index 0000000000..720c9bc4df --- /dev/null +++ b/ci/Dockerfile.multi @@ -0,0 +1,76 @@ +# +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +### +# JNI CI image for multi-platform build +# +# Arguments: CUDA_VERSION=11.8.0 +# +### +ARG CUDA_VERSION=11.8.0 +ARG OS_RELEASE=8 +# multi-platform build with: docker buildx build --platform linux/arm64,linux/amd64 on either amd64 or arm64 host +# check available offcial arm-based docker images at https://hub.docker.com/r/nvidia/cuda/tags (OS/ARCH) +FROM --platform=$TARGETPLATFORM nvidia/cuda:$CUDA_VERSION-devel-rockylinux$OS_RELEASE +ARG TOOLSET_VERSION=11 +### Install basic requirements +RUN dnf install -y scl-utils +RUN dnf install -y gcc-toolset-${TOOLSET_VERSION} python39 +RUN dnf --enablerepo=powertools install -y zlib-devel maven tar wget patch ninja-build +# require git 2.18+ to keep consistent submodule operations +RUN dnf install -y git +## pre-create the CMAKE_INSTALL_PREFIX folder, set writable by any user for Jenkins +RUN mkdir /usr/local/rapids && mkdir /rapids && chmod 777 /usr/local/rapids && chmod 777 /rapids + +# 3.22.3+: CUDA architecture 'native' support + flexible CMAKE__*_LAUNCHER for ccache +ARG CMAKE_VERSION=3.26.4 +# default as arm64 release +ARG CMAKE_ARCH=aarch64 +# aarch64 cmake for arm build +RUN cd /usr/local && wget --quiet https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-${CMAKE_ARCH}.tar.gz && \ + tar zxf cmake-${CMAKE_VERSION}-linux-${CMAKE_ARCH}.tar.gz && \ + rm cmake-${CMAKE_VERSION}-linux-${CMAKE_ARCH}.tar.gz +ENV PATH /usr/local/cmake-${CMAKE_VERSION}-linux-${CMAKE_ARCH}/bin:$PATH + +# ccache for interactive builds +ARG CCACHE_VERSION=4.6 +RUN cd /tmp && wget --quiet https://github.com/ccache/ccache/releases/download/v${CCACHE_VERSION}/ccache-${CCACHE_VERSION}.tar.gz && \ + tar zxf ccache-${CCACHE_VERSION}.tar.gz && \ + rm ccache-${CCACHE_VERSION}.tar.gz && \ + cd ccache-${CCACHE_VERSION} && \ + mkdir build && \ + cd build && \ + scl enable gcc-toolset-${TOOLSET_VERSION} \ + "cmake .. \ + -DCMAKE_BUILD_TYPE=Release \ + -DZSTD_FROM_INTERNET=ON \ + -DREDIS_STORAGE_BACKEND=OFF && \ + cmake --build . --parallel 4 --target install" && \ + cd ../.. && \ + rm -rf ccache-${CCACHE_VERSION} + +## install a version of boost that is needed for arrow/parquet to work +RUN cd /usr/local && wget --quiet https://boostorg.jfrog.io/artifactory/main/release/1.79.0/source/boost_1_79_0.tar.gz && \ + tar -xzf boost_1_79_0.tar.gz && \ + rm boost_1_79_0.tar.gz && \ + cd boost_1_79_0 && \ + ./bootstrap.sh --prefix=/usr/local && \ + ./b2 install --prefix=/usr/local --with-filesystem --with-system && \ + cd /usr/local && \ + rm -rf boost_1_79_0 + +# disable cuda container constraints to allow running w/ elder drivers on data-center GPUs +ENV NVIDIA_DISABLE_REQUIRE="true" diff --git a/ci/nightly-build.sh b/ci/nightly-build.sh index 668d650c5c..15254f6e64 100755 --- a/ci/nightly-build.sh +++ b/ci/nightly-build.sh @@ -26,10 +26,22 @@ MVN="mvn -Dmaven.wagon.http.retryHandler.count=3 -B" CUDA_VER=${CUDA_VER:-cuda`nvcc --version | sed -n 's/^.*release \([0-9]\+\)\..*$/\1/p'`} PARALLEL_LEVEL=${PARALLEL_LEVEL:-4} USE_GDS=${USE_GDS:-ON} +USE_SANITIZER=${USE_SANITIZER:-ON} +BUILD_FAULTINJ=${BUILD_FAULTINJ:-ON} +ARM64=${ARM64:-false} + +profiles="source-javadoc" +if [ "${ARM64}" == "true" ]; then + profiles="${profiles},arm64" + USE_GDS="OFF" + USE_SANITIZER="OFF" + BUILD_FAULTINJ="OFF" +fi + ${MVN} clean package ${MVN_MIRROR} \ - -Psource-javadoc \ + -P${profiles} \ -DCPP_PARALLEL_LEVEL=${PARALLEL_LEVEL} \ -Dlibcudf.build.configure=true \ -DUSE_GDS=${USE_GDS} -Dtest=*,!CuFileTest,!CudaFatalTest,!ColumnViewNonEmptyNullsTest \ - -DBUILD_TESTS=ON -Dcuda.version=$CUDA_VER \ - -DUSE_SANITIZER=ON + -DBUILD_TESTS=ON -DBUILD_FAULTINJ=${BUILD_FAULTINJ} -Dcuda.version=$CUDA_VER \ + -DUSE_SANITIZER=${USE_SANITIZER} diff --git a/pom.xml b/pom.xml index 1b1787919c..a60fd498f1 100644 --- a/pom.xml +++ b/pom.xml @@ -83,9 +83,11 @@ OFF OFF OFF + ON false false cuda11 + ${cuda.version} ${project.basedir}/thirdparty/cudf 3.2.4 5.8.1 @@ -332,6 +334,12 @@ + + arm64 + + ${cuda.version}-arm64 + + @@ -441,6 +449,7 @@ + maven-jar-plugin 3.0.2 - ${cuda.version} + ${jni.classifier} diff --git a/src/main/cpp/CMakeLists.txt b/src/main/cpp/CMakeLists.txt index 37b2579dd2..964c6a3043 100644 --- a/src/main/cpp/CMakeLists.txt +++ b/src/main/cpp/CMakeLists.txt @@ -43,6 +43,7 @@ option(CUDF_USE_PER_THREAD_DEFAULT_STREAM "Build with per-thread default stream" option(USE_GDS "Build with GPUDirect Storage (GDS)/cuFile support" OFF) option(BUILD_TESTS "Configure CMake to build tests" OFF) option(BUILD_BENCHMARKS "Configure CMake to build (google) benchmarks" OFF) +option(BUILD_FAULTINJ "Configure CMake to build fault injection" ON) message( VERBOSE "SPARK_RAPIDS_JNI: Build with per-thread default stream: @@ -50,6 +51,7 @@ message( ) message(VERBOSE "SPARK_RAPIDS_JNI: Configure CMake to build tests: ${BUILD_TESTS}") message(VERBOSE "SPARK_RAPIDS_JNI: Configure CMake to build (nvbench) benchmarks: ${BUILD_BENCHMARKS}") +message(VERBOSE "SPARK_RAPIDS_JNI: Configure CMake to build fault injection: ${BUILD_FAULTINJ}") set(SPARK_RAPIDS_JNI_CXX_FLAGS "") set(SPARK_RAPIDS_JNI_CUDA_FLAGS "") @@ -57,6 +59,7 @@ set(SPARK_RAPIDS_JNI_CXX_DEFINITIONS "") set(SPARK_RAPIDS_JNI_CUDA_DEFINITIONS "") set(SPARK_RAPIDS_JNI_BUILD_TESTS ${BUILD_TESTS}) set(SPARK_RAPIDS_JNI_BUILD_BENCHMARKS ${BUILD_BENCHMARKS}) +set(SPARK_RAPIDS_JNI_BUILD_FAULTINJ ${BUILD_FAULTINJ}) # Set RMM logging level set(RMM_LOGGING_LEVEL @@ -279,4 +282,6 @@ if(SPARK_RAPIDS_JNI_BUILD_BENCHMARKS) add_subdirectory(benchmarks) endif() -add_subdirectory(faultinj) +if(SPARK_RAPIDS_JNI_BUILD_FAULTINJ) + add_subdirectory(faultinj) +endif() From 1944a1ee5ed0f95674c391de7ca875e90e22c335 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Fri, 8 Sep 2023 10:23:26 +0800 Subject: [PATCH 093/136] Update submodule cudf to b4da39cfbe569e290ae42ca9cf8ff868d5788757 (#1403) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index c9d88219ce..b4da39cfbe 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit c9d88219ce6e920b8fad977ade437bf87d1d5099 +Subproject commit b4da39cfbe569e290ae42ca9cf8ff868d5788757 From 56c349f48fed26a3253dd679e551ae1df87dac5c Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Fri, 8 Sep 2023 21:08:45 +0800 Subject: [PATCH 094/136] Update submodule cudf to b2ab2566c155b4b753b14e5b5c013653b701148d (#1404) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index b4da39cfbe..b2ab2566c1 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit b4da39cfbe569e290ae42ca9cf8ff868d5788757 +Subproject commit b2ab2566c155b4b753b14e5b5c013653b701148d From 4c844d448762ce4b4db07cf10a64924c00b1def3 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Sat, 9 Sep 2023 05:14:12 +0800 Subject: [PATCH 095/136] Update submodule cudf to 01730c46a4f403fd5cf9245512c941176eef2428 (#1405) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index b2ab2566c1..01730c46a4 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit b2ab2566c155b4b753b14e5b5c013653b701148d +Subproject commit 01730c46a4f403fd5cf9245512c941176eef2428 From 96d583ea66f48ef9b5c9ae49799bc22dd954a069 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Sat, 9 Sep 2023 11:08:39 +0800 Subject: [PATCH 096/136] Update submodule cudf to 886e189e4c3cbad258563f4ec5b0f41fc6e15b5e (#1406) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 01730c46a4..886e189e4c 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 01730c46a4f403fd5cf9245512c941176eef2428 +Subproject commit 886e189e4c3cbad258563f4ec5b0f41fc6e15b5e From ba6eefaf040a7eae1a4322d9ee5cff27a6fe65da Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Tue, 12 Sep 2023 05:08:43 +0800 Subject: [PATCH 097/136] Update submodule cudf to bc304a29d244ad502fbdc6a304c5de0e99aeb57c (#1407) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 886e189e4c..bc304a29d2 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 886e189e4c3cbad258563f4ec5b0f41fc6e15b5e +Subproject commit bc304a29d244ad502fbdc6a304c5de0e99aeb57c From c65e9e5f3af2d41021703b5d174838da8963ccc2 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Tue, 12 Sep 2023 11:14:29 +0800 Subject: [PATCH 098/136] Update submodule cudf to c3bf70595210d684fd747a927e59abc739aea8cf (#1408) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index bc304a29d2..c3bf705952 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit bc304a29d244ad502fbdc6a304c5de0e99aeb57c +Subproject commit c3bf70595210d684fd747a927e59abc739aea8cf From 5681545fee267ebff9e319b3616a912960e5ae8f Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Tue, 12 Sep 2023 17:09:44 +0800 Subject: [PATCH 099/136] Update submodule cudf to 1911d33231ac9caeaf5310173bf6a47ffca35fe8 (#1409) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index c3bf705952..1911d33231 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit c3bf70595210d684fd747a927e59abc739aea8cf +Subproject commit 1911d33231ac9caeaf5310173bf6a47ffca35fe8 From 0707504c20e3d7eb6f658b81737f2f7def881b69 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Wed, 13 Sep 2023 05:09:00 +0800 Subject: [PATCH 100/136] Update submodule cudf to 72c958380f42dac5bd04492043cfd569fdcd5f0a (#1410) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 1911d33231..72c958380f 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 1911d33231ac9caeaf5310173bf6a47ffca35fe8 +Subproject commit 72c958380f42dac5bd04492043cfd569fdcd5f0a From 8734858e877bf7beddb1da78f72fe4c69abec4cf Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Wed, 13 Sep 2023 11:09:38 +0800 Subject: [PATCH 101/136] Update submodule cudf to 3be772fc5560127ff0ba6ad99d1cf618176e57fd (#1411) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 72c958380f..3be772fc55 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 72c958380f42dac5bd04492043cfd569fdcd5f0a +Subproject commit 3be772fc5560127ff0ba6ad99d1cf618176e57fd From 238e97fb88d6b0520ae80467627baa29a8779007 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Wed, 13 Sep 2023 17:17:34 +0800 Subject: [PATCH 102/136] Update submodule cudf to 99c77111a20a2aea849d234ebe4c36171dc885fc (#1412) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 3be772fc55..99c77111a2 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 3be772fc5560127ff0ba6ad99d1cf618176e57fd +Subproject commit 99c77111a20a2aea849d234ebe4c36171dc885fc From 1bf1d2c79255964410d7c805acf6c774554af41f Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Thu, 14 Sep 2023 05:08:43 +0800 Subject: [PATCH 103/136] Update submodule cudf to 1668c2caac27c5c92dfeddb20271b835b36c5615 (#1413) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 99c77111a2..1668c2caac 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 99c77111a20a2aea849d234ebe4c36171dc885fc +Subproject commit 1668c2caac27c5c92dfeddb20271b835b36c5615 From e80565b85b1a1ee783c15b6493741e0cac259e53 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Thu, 14 Sep 2023 11:09:07 +0800 Subject: [PATCH 104/136] Update submodule cudf to 1bfeee7575e137bc75741cb2caf015e55ecab2cd (#1414) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 1668c2caac..1bfeee7575 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 1668c2caac27c5c92dfeddb20271b835b36c5615 +Subproject commit 1bfeee7575e137bc75741cb2caf015e55ecab2cd From f4604808f20e057d051f09a9f197f47d6ab4461c Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Sat, 16 Sep 2023 11:09:10 +0800 Subject: [PATCH 105/136] Update submodule cudf to 3b691f4be744ff1155df3634cd334211e738e37d (#1415) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 1bfeee7575..3b691f4be7 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 1bfeee7575e137bc75741cb2caf015e55ecab2cd +Subproject commit 3b691f4be744ff1155df3634cd334211e738e37d From a7de78e8f6dd93a931d7ccf4af538197c38e873c Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Tue, 19 Sep 2023 05:11:15 +0800 Subject: [PATCH 106/136] Update submodule cudf to 4467066c952111c0131383784d3eb6bf3248f0ac (#1417) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 3b691f4be7..4467066c95 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 3b691f4be744ff1155df3634cd334211e738e37d +Subproject commit 4467066c952111c0131383784d3eb6bf3248f0ac From 93e5a22fe3a6d02ff820ceb5e775c559477bcd9f Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Tue, 19 Sep 2023 11:09:20 +0800 Subject: [PATCH 107/136] Update submodule cudf to bdc1f3a6e1f383cd689ba8e92903b89e49cdb8d8 (#1418) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 4467066c95..bdc1f3a6e1 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 4467066c952111c0131383784d3eb6bf3248f0ac +Subproject commit bdc1f3a6e1f383cd689ba8e92903b89e49cdb8d8 From 20710ef53c152e45bd77810f34d71b3a97196658 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Tue, 19 Sep 2023 23:08:59 +0800 Subject: [PATCH 108/136] Update submodule cudf to c016b58b24e63468e9110a6ca82adfc5fd61202d (#1419) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index bdc1f3a6e1..c016b58b24 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit bdc1f3a6e1f383cd689ba8e92903b89e49cdb8d8 +Subproject commit c016b58b24e63468e9110a6ca82adfc5fd61202d From 89d01385efd5e40e53e6bd48972d9fe9b960c5d6 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Wed, 20 Sep 2023 17:09:09 +0800 Subject: [PATCH 109/136] Update submodule cudf to 63d197fe029ff2b57f4e0c7ab975bb35f844fc25 (#1420) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index c016b58b24..63d197fe02 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit c016b58b24e63468e9110a6ca82adfc5fd61202d +Subproject commit 63d197fe029ff2b57f4e0c7ab975bb35f844fc25 From 5f1576aac47697e940e4f3df5bae7b318dcdeaaa Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Wed, 20 Sep 2023 23:08:57 +0800 Subject: [PATCH 110/136] Update submodule cudf to 2d4f22a9ab0709f808af9253097037e0eb5d00b1 (#1421) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 63d197fe02..2d4f22a9ab 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 63d197fe029ff2b57f4e0c7ab975bb35f844fc25 +Subproject commit 2d4f22a9ab0709f808af9253097037e0eb5d00b1 From 74d1e6f5a7dc561ba2b82da8084935411809d520 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Thu, 21 Sep 2023 05:08:40 +0800 Subject: [PATCH 111/136] Update submodule cudf to 40d4cc5565f600864c3b16f30d3d26fd4904deaf (#1422) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 2d4f22a9ab..40d4cc5565 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 2d4f22a9ab0709f808af9253097037e0eb5d00b1 +Subproject commit 40d4cc5565f600864c3b16f30d3d26fd4904deaf From 987dddc478bad0fc4371fdda779799fab0f6b210 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Thu, 21 Sep 2023 11:08:18 +0800 Subject: [PATCH 112/136] Update submodule cudf to e87d2fc1df6105d802b300bad19a9937f8155613 (#1423) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 40d4cc5565..e87d2fc1df 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 40d4cc5565f600864c3b16f30d3d26fd4904deaf +Subproject commit e87d2fc1df6105d802b300bad19a9937f8155613 From 55eba26e3de7c8d8baa3d8c2e1e297aef044bed3 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Fri, 22 Sep 2023 05:29:54 +0800 Subject: [PATCH 113/136] Update submodule cudf to dcac6cc6a719e2caf1c461be32acd2f7e78308e2 (#1424) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index e87d2fc1df..dcac6cc6a7 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit e87d2fc1df6105d802b300bad19a9937f8155613 +Subproject commit dcac6cc6a719e2caf1c461be32acd2f7e78308e2 From a9be0bc3ab86c81d9cae72237448161db107156b Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Fri, 22 Sep 2023 11:08:37 +0800 Subject: [PATCH 114/136] Update submodule cudf to f0ba8598dd9792e137ca7aa3a1b22dbb84393cc1 (#1425) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index dcac6cc6a7..f0ba8598dd 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit dcac6cc6a719e2caf1c461be32acd2f7e78308e2 +Subproject commit f0ba8598dd9792e137ca7aa3a1b22dbb84393cc1 From 0741aa4b3f8901ae097a05b18ee88e89f031dea1 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Fri, 22 Sep 2023 23:08:11 +0800 Subject: [PATCH 115/136] Update submodule cudf to dd58dc4e9dae387c878afbe6cb32a311ce76fe68 (#1426) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index f0ba8598dd..dd58dc4e9d 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit f0ba8598dd9792e137ca7aa3a1b22dbb84393cc1 +Subproject commit dd58dc4e9dae387c878afbe6cb32a311ce76fe68 From 75854de35c536c906987001109b24a373d0947d9 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Sat, 23 Sep 2023 05:08:41 +0800 Subject: [PATCH 116/136] Update submodule cudf to c7dd6b48684028a65b1d19d5d5b04060f6a4fe19 (#1427) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index dd58dc4e9d..c7dd6b4868 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit dd58dc4e9dae387c878afbe6cb32a311ce76fe68 +Subproject commit c7dd6b48684028a65b1d19d5d5b04060f6a4fe19 From fa79c0b93c5e0eebcf432b9f45020149ac9e0e52 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Sat, 23 Sep 2023 11:08:22 +0800 Subject: [PATCH 117/136] Update submodule cudf to 71f30bec80194e8711156cea90d09b4ee0c940bd (#1428) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index c7dd6b4868..71f30bec80 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit c7dd6b48684028a65b1d19d5d5b04060f6a4fe19 +Subproject commit 71f30bec80194e8711156cea90d09b4ee0c940bd From 0396245420e2a3113a59174312f96bbd8fc0b688 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Sat, 23 Sep 2023 17:08:29 +0800 Subject: [PATCH 118/136] Update submodule cudf to d67cc5d05a6c18dd832f7b63421296fb66ae56f1 (#1429) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 71f30bec80..d67cc5d05a 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 71f30bec80194e8711156cea90d09b4ee0c940bd +Subproject commit d67cc5d05a6c18dd832f7b63421296fb66ae56f1 From c6acb1667f3e1e16b1690c05422920a95c35f4ef Mon Sep 17 00:00:00 2001 From: Peixin Date: Mon, 25 Sep 2023 09:36:05 +0800 Subject: [PATCH 119/136] Automerge from 23.10 to 23.12 (#1431) Signed-off-by: Peixin Li --- .github/workflows/auto-merge.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/auto-merge.yml b/.github/workflows/auto-merge.yml index beaa9a64c6..ee55236986 100755 --- a/.github/workflows/auto-merge.yml +++ b/.github/workflows/auto-merge.yml @@ -18,12 +18,12 @@ name: auto-merge HEAD to BASE on: pull_request_target: branches: - - branch-23.08 + - branch-23.10 types: [closed] env: - HEAD: branch-23.08 - BASE: branch-23.10 + HEAD: branch-23.10 + BASE: branch-23.12 jobs: auto-merge: From e07e9ae9e0ca40390f9187844ccc8548a1d7f84b Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Tue, 26 Sep 2023 01:38:51 +0800 Subject: [PATCH 120/136] Update submodule cudf to 3f47b5d463445faa9f95b1cc57c46fb5b41f60a7 (#1434) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index d67cc5d05a..3f47b5d463 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit d67cc5d05a6c18dd832f7b63421296fb66ae56f1 +Subproject commit 3f47b5d463445faa9f95b1cc57c46fb5b41f60a7 From d6d57ad0e2f937779465394fb94d968b90354405 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Tue, 26 Sep 2023 06:38:34 +0800 Subject: [PATCH 121/136] Update submodule cudf to 1b925bfc7741eb22fed0a978fa0e1d0d5dfee601 (#1437) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 3f47b5d463..1b925bfc77 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 3f47b5d463445faa9f95b1cc57c46fb5b41f60a7 +Subproject commit 1b925bfc7741eb22fed0a978fa0e1d0d5dfee601 From 894d38c14b8962b40319924d3cdcd3ce158d9e3d Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Tue, 26 Sep 2023 10:38:51 +0800 Subject: [PATCH 122/136] Update submodule cudf to 2e1a17d6519ea018921e35075306e01b4fdddf72 (#1439) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 1b925bfc77..2e1a17d651 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 1b925bfc7741eb22fed0a978fa0e1d0d5dfee601 +Subproject commit 2e1a17d6519ea018921e35075306e01b4fdddf72 From 8ef59a09e4ba969c2a80dd7fb8a49ba366ac4f4d Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Wed, 27 Sep 2023 02:20:55 +0800 Subject: [PATCH 123/136] Update submodule cudf to daea8c8bc37ec53b7347857a3b6795bcb0ad86ff (#1446) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 2e1a17d651..daea8c8bc3 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 2e1a17d6519ea018921e35075306e01b4fdddf72 +Subproject commit daea8c8bc37ec53b7347857a3b6795bcb0ad86ff From 7f8fe949804caaa497698fcd2b967d283725e535 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Wed, 27 Sep 2023 07:31:36 +0800 Subject: [PATCH 124/136] Update submodule cudf to 030c0f4995ec458fcfc00a4ebb3aa8bccb2b27a0 (#1449) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index daea8c8bc3..030c0f4995 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit daea8c8bc37ec53b7347857a3b6795bcb0ad86ff +Subproject commit 030c0f4995ec458fcfc00a4ebb3aa8bccb2b27a0 From 28d94b9937767ba4baaf238ea36949f3c8f2105a Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Wed, 27 Sep 2023 11:19:58 +0800 Subject: [PATCH 125/136] Update submodule cudf to b25b292f7f97cbb681f0244e1a20b30a925145a1 (#1453) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 030c0f4995..b25b292f7f 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 030c0f4995ec458fcfc00a4ebb3aa8bccb2b27a0 +Subproject commit b25b292f7f97cbb681f0244e1a20b30a925145a1 From d843f54878e4b5e0ba9cdbd6cfb5c2c24c47af2a Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Thu, 28 Sep 2023 02:17:09 +0800 Subject: [PATCH 126/136] Update submodule cudf to 31e56702fe15f44b3e849207d31d0bb79c307367 (#1458) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index b25b292f7f..31e56702fe 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit b25b292f7f97cbb681f0244e1a20b30a925145a1 +Subproject commit 31e56702fe15f44b3e849207d31d0bb79c307367 From f32fe74027b25fdb64c997ca7515490a8c210072 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Thu, 28 Sep 2023 06:15:11 +0800 Subject: [PATCH 127/136] Update submodule cudf to b789d4ce3c090a3f25a8657d9a8582a1edb54f12 (#1461) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 31e56702fe..b789d4ce3c 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 31e56702fe15f44b3e849207d31d0bb79c307367 +Subproject commit b789d4ce3c090a3f25a8657d9a8582a1edb54f12 From d0d058e249983e47f9e08df989197ab1310e3042 Mon Sep 17 00:00:00 2001 From: Raza Jafri Date: Thu, 28 Sep 2023 13:13:07 -0700 Subject: [PATCH 128/136] Added javadocs to CastStrings.fromIntegersWithBase method [skip ci] (#1452) * added javadocs * Signing off Signed-off-by: Raza Jafri * addressed review comments * addressed review concern * fixed the docs with the correct hex value --------- Signed-off-by: Raza Jafri --- .../nvidia/spark/rapids/jni/CastStrings.java | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/src/main/java/com/nvidia/spark/rapids/jni/CastStrings.java b/src/main/java/com/nvidia/spark/rapids/jni/CastStrings.java index 5df44c77c1..eab42c41f6 100644 --- a/src/main/java/com/nvidia/spark/rapids/jni/CastStrings.java +++ b/src/main/java/com/nvidia/spark/rapids/jni/CastStrings.java @@ -109,6 +109,24 @@ public static ColumnVector toIntegersWithBase(ColumnView cv, int base, type.getTypeId().getNativeId())); } + /** + * Converts an integer column to a string column by converting the underlying integers to the + * specified base. + * + * Note: Right now we only support base 10 and 16. The hexadecimal values will be + * returned without leading zeros or padding at the end + * + * Example: + * input = [123, -1, 0, 27, 342718233] + * s = fromIntegersWithBase(input, 16) + * s is [ '7B', 'FFFFFFFF', '0', '1B', '146D7719'] + * s = fromIntegersWithBase(input, 10) + * s is ['123', '-1', '0', '27', '342718233'] + * + * @param cv The input integer column to be converted. + * @param base base that we want to convert to (currently only 10/16) + * @return a new String ColumnVector + */ public static ColumnVector fromIntegersWithBase(ColumnView cv, int base) { return new ColumnVector(fromIntegersWithBase(cv.getNativeView(), base)); } From 54ef9991f46fa873d580315212aeae345da7152a Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Fri, 29 Sep 2023 07:21:26 +0800 Subject: [PATCH 129/136] [submodule-sync] bot-submodule-sync-branch-23.10 to branch-23.10 [skip ci] [bot] (#1464) * Update submodule cudf to 53f0f74f6c6d66441225278f19a69885fb8b43c6 Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> * Update submodule cudf to b2f00809f40e2e81b01214177b412456d40404cc Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --------- Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index b789d4ce3c..b2f00809f4 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit b789d4ce3c090a3f25a8657d9a8582a1edb54f12 +Subproject commit b2f00809f40e2e81b01214177b412456d40404cc From ea590b31740a847839dbef374e03e2aca0baa979 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Wed, 4 Oct 2023 01:10:39 +0800 Subject: [PATCH 130/136] Update submodule cudf to 66a655ce80e8b0accb80ea4e23799d23a82a35a2 (#1470) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index b2f00809f4..66a655ce80 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit b2f00809f40e2e81b01214177b412456d40404cc +Subproject commit 66a655ce80e8b0accb80ea4e23799d23a82a35a2 From e8219ce9c48d23ec4379e3c557c620579f3c2f43 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Wed, 4 Oct 2023 07:18:31 +0800 Subject: [PATCH 131/136] Update submodule cudf to 3964950ba2fecf7f962917276058a6381d396246 (#1472) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 66a655ce80..3964950ba2 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 66a655ce80e8b0accb80ea4e23799d23a82a35a2 +Subproject commit 3964950ba2fecf7f962917276058a6381d396246 From 93f4d224917176bf7281c6fb608cedf6d27d7ee0 Mon Sep 17 00:00:00 2001 From: "Robert (Bobby) Evans" Date: Fri, 6 Oct 2023 11:01:33 -0500 Subject: [PATCH 132/136] handle rmm::out_of_memory instead of std::bad_alloc for retry (#1477) Signed-off-by: Robert (Bobby) Evans --- src/main/cpp/src/SparkResourceAdaptorJni.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/main/cpp/src/SparkResourceAdaptorJni.cpp b/src/main/cpp/src/SparkResourceAdaptorJni.cpp index bcbe8080ca..16c950d121 100644 --- a/src/main/cpp/src/SparkResourceAdaptorJni.cpp +++ b/src/main/cpp/src/SparkResourceAdaptorJni.cpp @@ -1352,7 +1352,10 @@ class spark_resource_adaptor final : public rmm::mr::device_memory_resource { void* ret = resource->allocate(num_bytes, stream); post_alloc_success(tid, likely_spill); return ret; - } catch (const std::bad_alloc& e) { + } catch (const rmm::out_of_memory& e) { + // rmm::out_of_memory is what is thrown when an allocation failed + // but there are other rmm::bad_alloc exceptions that could be + // thrown as well, which are handled by the std::exception case. if (!post_alloc_failed(tid, true, likely_spill)) { throw; } } catch (const std::exception& e) { post_alloc_failed(tid, false, likely_spill); @@ -1360,7 +1363,7 @@ class spark_resource_adaptor final : public rmm::mr::device_memory_resource { } } // we should never reach this point, but just in case - throw std::bad_alloc(); + throw rmm::bad_alloc("Internal Error"); } void do_deallocate(void* p, std::size_t size, rmm::cuda_stream_view stream) override From 299a0f3e9943b0d93c7a1704ed6d94e0aa097e1f Mon Sep 17 00:00:00 2001 From: Peixin Date: Tue, 10 Oct 2023 17:00:13 +0800 Subject: [PATCH 133/136] fix docker modified check in premerge (#1485) Signed-off-by: Peixin Li --- ci/Jenkinsfile.premerge | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/ci/Jenkinsfile.premerge b/ci/Jenkinsfile.premerge index 8ae54f1781..72ef595a37 100644 --- a/ci/Jenkinsfile.premerge +++ b/ci/Jenkinsfile.premerge @@ -141,9 +141,10 @@ pipeline { container('cpu') { // check if pre-merge dockerfile modified def dockerfileModified = sh(returnStdout: true, - script: 'BASE=$(git --no-pager log --oneline -1 | awk \'{ print $NF }\'); ' + - 'git --no-pager diff --name-only HEAD $(git merge-base HEAD $BASE) ' + - "-- ${PREMERGE_DOCKERFILE} || true") +script: """BASE=\$(git --no-pager log --oneline -1 | awk \'{ print \$NF }\') +git --no-pager diff --name-only HEAD \$BASE -- ${PREMERGE_DOCKERFILE} || true""").trim() + echo "$dockerfileModified" + if (!dockerfileModified?.trim()) { TEMP_IMAGE_BUILD = false } From 0dcc62fc16656f130125d12dc849fa75fce922b6 Mon Sep 17 00:00:00 2001 From: Peixin Li Date: Wed, 11 Oct 2023 12:02:15 +0800 Subject: [PATCH 134/136] update project version to release 23.10.0 --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index a60fd498f1..15734055f6 100644 --- a/pom.xml +++ b/pom.xml @@ -21,7 +21,7 @@ com.nvidia spark-rapids-jni - 23.10.0-SNAPSHOT + 23.10.0 jar RAPIDS Accelerator JNI for Apache Spark From a9002c11d3cd3bb39bcca69a7b2c34f6c7ddfae0 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Thu, 12 Oct 2023 02:18:46 +0800 Subject: [PATCH 135/136] Update submodule cudf to 135879368a8fcecda0a1d85bcf18b7e15cd0269d (#1491) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 3964950ba2..135879368a 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 3964950ba2fecf7f962917276058a6381d396246 +Subproject commit 135879368a8fcecda0a1d85bcf18b7e15cd0269d From 796a4652629a6a37ed069e625004ce8c075434ca Mon Sep 17 00:00:00 2001 From: Peixin Date: Thu, 12 Oct 2023 09:20:58 +0800 Subject: [PATCH 136/136] Update cudf submodule ref to v23.10.00 (#1494) Signed-off-by: Peixin Li --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 135879368a..9f0c2f452f 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 135879368a8fcecda0a1d85bcf18b7e15cd0269d +Subproject commit 9f0c2f452f1cf318c3f7fe2c6f7e07fc513fc335