From df52957cee6396713f1557f886b611506cf3df7b Mon Sep 17 00:00:00 2001 From: Vishwanatha-HD Date: Wed, 19 Nov 2025 19:29:15 +0530 Subject: [PATCH 1/4] GH-48151: [C++][Parquet] Fix arrow-acero-asof-join-node-test failures on s390x --- cpp/src/arrow/compute/util.cc | 53 +++++++++++++++++++++++++++++------ 1 file changed, 44 insertions(+), 9 deletions(-) diff --git a/cpp/src/arrow/compute/util.cc b/cpp/src/arrow/compute/util.cc index 28bbfb7072bc..4b90102dd9fd 100644 --- a/cpp/src/arrow/compute/util.cc +++ b/cpp/src/arrow/compute/util.cc @@ -31,34 +31,40 @@ namespace util { namespace bit_util { inline uint64_t SafeLoadUpTo8Bytes(const uint8_t* bytes, int num_bytes) { - // This will not be correct on big-endian architectures. -#if !ARROW_LITTLE_ENDIAN - ARROW_DCHECK(false); -#endif ARROW_DCHECK(num_bytes >= 0 && num_bytes <= 8); if (num_bytes == 8) { return util::SafeLoad(reinterpret_cast(bytes)); } else { uint64_t word = 0; +#if ARROW_LITTLE_ENDIAN for (int i = 0; i < num_bytes; ++i) { word |= static_cast(bytes[i]) << (8 * i); } +#else + // Big-endian: most significant byte first + for (int i = 0; i < num_bytes; ++i) { + word |= static_cast(bytes[i]) << (8 * (num_bytes - 1 - i)); + } +#endif return word; } } inline void SafeStoreUpTo8Bytes(uint8_t* bytes, int num_bytes, uint64_t value) { - // This will not be correct on big-endian architectures. -#if !ARROW_LITTLE_ENDIAN - ARROW_DCHECK(false); -#endif ARROW_DCHECK(num_bytes >= 0 && num_bytes <= 8); if (num_bytes == 8) { util::SafeStore(reinterpret_cast(bytes), value); } else { +#if ARROW_LITTLE_ENDIAN for (int i = 0; i < num_bytes; ++i) { bytes[i] = static_cast(value >> (8 * i)); } +#else + // Big-endian: most significant byte first + for (int i = 0; i < num_bytes; ++i) { + bytes[i] = static_cast(value >> (8 * (num_bytes - 1 - i))); + } +#endif } } @@ -119,7 +125,22 @@ void bits_to_indexes_internal(int64_t hardware_flags, const int num_bits, // Optionally process the last partial word with masking out bits outside range if (tail) { const uint8_t* bits_tail = bits + (num_bits - tail) / 8; +#if ARROW_LITTLE_ENDIAN uint64_t word = SafeLoadUpTo8Bytes(bits_tail, (tail + 7) / 8); +#else + int tail_bytes = (tail + 7) / 8; + uint64_t word; + if (tail_bytes == 8) { + word = util::SafeLoad(reinterpret_cast(bits_tail)); + } else { + // For bit manipulation, always load into least significant bits + // to ensure compatibility with CountTrailingZeros on Big-endian systems + word = 0; + for (int i = 0; i < tail_bytes; ++i) { + word |= static_cast(bits_tail[i]) << (8 * i); + } + } +#endif if (bit_to_search == 0) { word = ~word; } @@ -300,7 +321,21 @@ void bytes_to_bits(int64_t hardware_flags, const int num_bits, const uint8_t* by } int tail = num_bits % unroll; if (tail) { - uint64_t bytes_next = SafeLoadUpTo8Bytes(bytes + num_bits - tail, tail); + uint64_t bytes_next; +#if ARROW_LITTLE_ENDIAN + bytes_next = SafeLoadUpTo8Bytes(bytes + num_bits - tail, tail); +#else + if (tail == 8) { + bytes_next = util::SafeLoad(reinterpret_cast(bytes + num_bits - tail)); + } else { + // On Big-endian systems, for bytes_to_bits, load all tail bytes in little-endian order + // to ensure compatibility with subsequent bit operations + bytes_next = 0; + for (int i = 0; i < tail; ++i) { + bytes_next |= static_cast((bytes + num_bits - tail)[i]) << (8 * i); + } + } +#endif bytes_next &= 0x0101010101010101ULL; bytes_next |= (bytes_next >> 7); // Pairs of adjacent output bits in individual bytes bytes_next |= (bytes_next >> 14); // 4 adjacent output bits in individual bytes From 12264eac0ecfefb2b8bdcaeab87ef13190c39ba0 Mon Sep 17 00:00:00 2001 From: Vishwanatha-HD Date: Wed, 19 Nov 2025 19:29:15 +0530 Subject: [PATCH 2/4] GH-48151: [C++][Parquet] Fix arrow-acero-asof-join-node-test failures on s390x --- cpp/src/arrow/compute/util.cc | 21 +++++---------------- 1 file changed, 5 insertions(+), 16 deletions(-) diff --git a/cpp/src/arrow/compute/util.cc b/cpp/src/arrow/compute/util.cc index 4b90102dd9fd..273e20d86f6b 100644 --- a/cpp/src/arrow/compute/util.cc +++ b/cpp/src/arrow/compute/util.cc @@ -36,16 +36,9 @@ inline uint64_t SafeLoadUpTo8Bytes(const uint8_t* bytes, int num_bytes) { return util::SafeLoad(reinterpret_cast(bytes)); } else { uint64_t word = 0; -#if ARROW_LITTLE_ENDIAN for (int i = 0; i < num_bytes; ++i) { word |= static_cast(bytes[i]) << (8 * i); } -#else - // Big-endian: most significant byte first - for (int i = 0; i < num_bytes; ++i) { - word |= static_cast(bytes[i]) << (8 * (num_bytes - 1 - i)); - } -#endif return word; } } @@ -325,15 +318,11 @@ void bytes_to_bits(int64_t hardware_flags, const int num_bits, const uint8_t* by #if ARROW_LITTLE_ENDIAN bytes_next = SafeLoadUpTo8Bytes(bytes + num_bits - tail, tail); #else - if (tail == 8) { - bytes_next = util::SafeLoad(reinterpret_cast(bytes + num_bits - tail)); - } else { - // On Big-endian systems, for bytes_to_bits, load all tail bytes in little-endian order - // to ensure compatibility with subsequent bit operations - bytes_next = 0; - for (int i = 0; i < tail; ++i) { - bytes_next |= static_cast((bytes + num_bits - tail)[i]) << (8 * i); - } + // On Big-endian systems, for bytes_to_bits, load all tail bytes in little-endian + // order to ensure compatibility with subsequent bit operations + bytes_next = 0; + for (int i = 0; i < tail; ++i) { + bytes_next |= static_cast((bytes + num_bits - tail)[i]) << (8 * i); } #endif bytes_next &= 0x0101010101010101ULL; From d816552b3eab95671dc7a22425c26e990a6b530c Mon Sep 17 00:00:00 2001 From: Rossi Sun Date: Wed, 10 Dec 2025 16:59:14 +0800 Subject: [PATCH 3/4] Add some fixes --- cpp/src/arrow/compute/util.cc | 46 +++++++++++++---------------------- 1 file changed, 17 insertions(+), 29 deletions(-) diff --git a/cpp/src/arrow/compute/util.cc b/cpp/src/arrow/compute/util.cc index 273e20d86f6b..cd0b9faa9466 100644 --- a/cpp/src/arrow/compute/util.cc +++ b/cpp/src/arrow/compute/util.cc @@ -37,7 +37,11 @@ inline uint64_t SafeLoadUpTo8Bytes(const uint8_t* bytes, int num_bytes) { } else { uint64_t word = 0; for (int i = 0; i < num_bytes; ++i) { +#if ARROW_LITTLE_ENDIAN word |= static_cast(bytes[i]) << (8 * i); +#else + word |= static_cast(bytes[i]) << (8 * (num_bytes - 1 - i)); +#endif } return word; } @@ -48,16 +52,13 @@ inline void SafeStoreUpTo8Bytes(uint8_t* bytes, int num_bytes, uint64_t value) { if (num_bytes == 8) { util::SafeStore(reinterpret_cast(bytes), value); } else { -#if ARROW_LITTLE_ENDIAN for (int i = 0; i < num_bytes; ++i) { +#if ARROW_LITTLE_ENDIAN bytes[i] = static_cast(value >> (8 * i)); - } #else - // Big-endian: most significant byte first - for (int i = 0; i < num_bytes; ++i) { bytes[i] = static_cast(value >> (8 * (num_bytes - 1 - i))); - } #endif + } } } @@ -103,6 +104,9 @@ void bits_to_indexes_internal(int64_t hardware_flags, const int num_bits, *num_indexes = 0; for (int i = 0; i < num_bits / unroll; ++i) { uint64_t word = util::SafeLoad(&reinterpret_cast(bits)[i]); +#if !ARROW_LITTLE_ENDIAN + word = ::arrow::bit_util::ByteSwap(word); +#endif if (bit_to_search == 0) { word = ~word; } @@ -118,21 +122,9 @@ void bits_to_indexes_internal(int64_t hardware_flags, const int num_bits, // Optionally process the last partial word with masking out bits outside range if (tail) { const uint8_t* bits_tail = bits + (num_bits - tail) / 8; -#if ARROW_LITTLE_ENDIAN uint64_t word = SafeLoadUpTo8Bytes(bits_tail, (tail + 7) / 8); -#else - int tail_bytes = (tail + 7) / 8; - uint64_t word; - if (tail_bytes == 8) { - word = util::SafeLoad(reinterpret_cast(bits_tail)); - } else { - // For bit manipulation, always load into least significant bits - // to ensure compatibility with CountTrailingZeros on Big-endian systems - word = 0; - for (int i = 0; i < tail_bytes; ++i) { - word |= static_cast(bits_tail[i]) << (8 * i); - } - } +#if !ARROW_LITTLE_ENDIAN + word = ::arrow::bit_util::ByteSwap(word); #endif if (bit_to_search == 0) { word = ~word; @@ -306,6 +298,9 @@ void bytes_to_bits(int64_t hardware_flags, const int num_bits, const uint8_t* by constexpr int unroll = 8; for (int i = num_processed / unroll; i < num_bits / unroll; ++i) { uint64_t bytes_next = util::SafeLoad(&reinterpret_cast(bytes)[i]); +#if !ARROW_LITTLE_ENDIAN + bytes_next = ::arrow::bit_util::ByteSwap(bytes_next); +#endif bytes_next &= 0x0101010101010101ULL; bytes_next |= (bytes_next >> 7); // Pairs of adjacent output bits in individual bytes bytes_next |= (bytes_next >> 14); // 4 adjacent output bits in individual bytes @@ -314,16 +309,9 @@ void bytes_to_bits(int64_t hardware_flags, const int num_bits, const uint8_t* by } int tail = num_bits % unroll; if (tail) { - uint64_t bytes_next; -#if ARROW_LITTLE_ENDIAN - bytes_next = SafeLoadUpTo8Bytes(bytes + num_bits - tail, tail); -#else - // On Big-endian systems, for bytes_to_bits, load all tail bytes in little-endian - // order to ensure compatibility with subsequent bit operations - bytes_next = 0; - for (int i = 0; i < tail; ++i) { - bytes_next |= static_cast((bytes + num_bits - tail)[i]) << (8 * i); - } + uint64_t bytes_next = SafeLoadUpTo8Bytes(bytes + num_bits - tail, tail); +#if !ARROW_LITTLE_ENDIAN + bytes_next = ::arrow::bit_util::ByteSwap(bytes_next); #endif bytes_next &= 0x0101010101010101ULL; bytes_next |= (bytes_next >> 7); // Pairs of adjacent output bits in individual bytes From d97c6820fccc895ae7b093f28e9706bda4182722 Mon Sep 17 00:00:00 2001 From: Rossi Sun Date: Thu, 18 Jun 2026 22:18:22 -0700 Subject: [PATCH 4/4] Fix BE partial safe load/store --- cpp/src/arrow/compute/util.cc | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/cpp/src/arrow/compute/util.cc b/cpp/src/arrow/compute/util.cc index cd0b9faa9466..3e3114b111b5 100644 --- a/cpp/src/arrow/compute/util.cc +++ b/cpp/src/arrow/compute/util.cc @@ -40,7 +40,7 @@ inline uint64_t SafeLoadUpTo8Bytes(const uint8_t* bytes, int num_bytes) { #if ARROW_LITTLE_ENDIAN word |= static_cast(bytes[i]) << (8 * i); #else - word |= static_cast(bytes[i]) << (8 * (num_bytes - 1 - i)); + word |= static_cast(bytes[i]) << (8 * (7 - i)); #endif } return word; @@ -56,7 +56,7 @@ inline void SafeStoreUpTo8Bytes(uint8_t* bytes, int num_bytes, uint64_t value) { #if ARROW_LITTLE_ENDIAN bytes[i] = static_cast(value >> (8 * i)); #else - bytes[i] = static_cast(value >> (8 * (num_bytes - 1 - i))); + bytes[i] = static_cast(value >> (8 * (7 - i))); #endif } } @@ -250,6 +250,9 @@ void bits_to_bytes(int64_t hardware_flags, const int num_bits, const uint8_t* bi unpacked |= (bits_next & 1); unpacked &= 0x0101010101010101ULL; unpacked *= 255; +#if !ARROW_LITTLE_ENDIAN + unpacked = ::arrow::bit_util::ByteSwap(unpacked); +#endif util::SafeStore(&reinterpret_cast(bytes)[i], unpacked); } int tail = num_bits % unroll; @@ -263,6 +266,9 @@ void bits_to_bytes(int64_t hardware_flags, const int num_bits, const uint8_t* bi unpacked |= (bits_next & 1); unpacked &= 0x0101010101010101ULL; unpacked *= 255; +#if !ARROW_LITTLE_ENDIAN + unpacked = ::arrow::bit_util::ByteSwap(unpacked); +#endif SafeStoreUpTo8Bytes(bytes + num_bits - tail, tail, unpacked); } }