|
| 1 | +From 4635e7834d872aedc71e33a4a7784bb3de4515eb Mon Sep 17 00:00:00 2001 |
| 2 | +From: Mika Laitio < [email protected]> |
| 3 | +Date: Fri, 31 May 2024 18:35:12 -0700 |
| 4 | +Subject: [PATCH 8/8] replace clamp with min and max for fedora 40 issue |
| 5 | + |
| 6 | +Fedora 40/gcc 14 throws following error during build time |
| 7 | +for clamp function usage during pytorch build time. |
| 8 | + |
| 9 | +In file included from /usr/lib/gcc/x86_64-redhat-linux/14/../../../../include/c++/14/algorithm:61: |
| 10 | +/usr/lib/gcc/x86_64-redhat-linux/14/../../../../include/c++/14/bits/stl_algo.h:3625:7: error: reference to host function '__glibcxx_assert_fail' in host device function |
| 11 | +3625 | __glibcxx_assert(!(__hi < __lo)); |
| 12 | +| ^ |
| 13 | +/usr/lib/gcc/x86_64-redhat-linux/14/../../../../include/c++/14/x86_64-redhat-linux/bits/c++config.h:2453:7: note: expanded from macro '__glibcxx_assert' |
| 14 | +2453 | std::__glibcxx_assert_fail(); |
| 15 | +| ^ |
| 16 | +pytorch/aten/src/ATen/native/hip/IndexKernel.hip:254:21: note: called by 'operator()' |
| 17 | +254 | qvalue = std::clamp(qvalue, qmin, qmax); |
| 18 | + |
| 19 | +https://github.com/pytorch/pytorch/issues/127666 |
| 20 | +https://github.com/lamikr/rocm_sdk_builder/issues/12 |
| 21 | + |
| 22 | +Signed-off-by: Mika Laitio < [email protected]> |
| 23 | +--- |
| 24 | + aten/src/ATen/native/cuda/IndexKernel.cu | 4 +++- |
| 25 | + 1 file changed, 3 insertions(+), 1 deletion(-) |
| 26 | + |
| 27 | +diff --git a/aten/src/ATen/native/cuda/IndexKernel.cu b/aten/src/ATen/native/cuda/IndexKernel.cu |
| 28 | +index 5682ba27573..862bcb9614d 100644 |
| 29 | +--- a/aten/src/ATen/native/cuda/IndexKernel.cu |
| 30 | ++++ b/aten/src/ATen/native/cuda/IndexKernel.cu |
| 31 | +@@ -249,7 +249,9 @@ void index_put_kernel_quantized_cuda(TensorIterator& iter, const IntArrayRef ind |
| 32 | + |
| 33 | + gpu_index_kernel(iter, index_size, index_stride, [inv_scale, zero_point, qmin, qmax]C10_DEVICE(char* const out_data, const char* const in_data, const int64_t offset) { |
| 34 | + int64_t qvalue = static_cast<int64_t>(zero_point + nearbyintf(*(float*)in_data * inv_scale)); |
| 35 | +- qvalue = std::clamp(qvalue, qmin, qmax); |
| 36 | ++ int64_t new_max = std::max<int64_t>(qmin, qvalue); |
| 37 | ++ qvalue = std::min<int64_t>(qmax, new_max); |
| 38 | ++ //qvalue = std::clamp(qvalue, qmin, qmax); |
| 39 | + *(scalar_t*)(out_data + offset) = static_cast<scalar_t>(qvalue); |
| 40 | + }); |
| 41 | + }); |
| 42 | +-- |
| 43 | +2.45.1 |
| 44 | + |
0 commit comments