NVIDIA
diff --git a/‎libcudacxx/include/cuda/std/__floating_point/format.h‎
Lines changed: 0 additions & 1 deletion b/‎libcudacxx/include/cuda/std/__floating_point/format.h‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎libcudacxx/include/cuda/std/__floating_point/storage.h‎
Lines changed: 13 additions & 22 deletions b/‎libcudacxx/include/cuda/std/__floating_point/storage.h‎
Lines changed: 13 additions & 22 deletions
diff --git a/‎libcudacxx/include/cuda/std/__floating_point/traits.h‎
Lines changed: 0 additions & 1 deletion b/‎libcudacxx/include/cuda/std/__floating_point/traits.h‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎libcudacxx/include/cuda/std/__limits/numeric_limits.h‎
Lines changed: 11 additions & 2 deletions b/‎libcudacxx/include/cuda/std/__limits/numeric_limits.h‎
Lines changed: 11 additions & 2 deletions
@@ -21,7 +21,6 @@
 #  pragma system_header
 #endif // no system header
 
-#include <cuda/std/__floating_point/cuda_fp_types.h>
 #include <cuda/std/__fwd/fp.h>
 #include <cuda/std/__type_traits/is_same.h>
 #include <cuda/std/cfloat>
 
@@ -22,7 +22,6 @@
 #endif // no system header
 
 #include <cuda/std/__bit/bit_cast.h>
-#include <cuda/std/__floating_point/cuda_fp_types.h>
 #include <cuda/std/__floating_point/format.h>
 #include <cuda/std/__floating_point/traits.h>
 #include <cuda/std/__type_traits/always_false.h>
@@ -72,19 +71,11 @@ using __fp_storage_t = decltype(__fp_storage_type_impl<_Fmt>());
 template <class _Tp>
 using __fp_storage_of_t = __fp_storage_t<__fp_format_of_v<_Tp>>;
 
-#if _CCCL_HAS_NVFP16()
-struct __cccl_nvfp16_manip_helper : __half
-{
-  using __half::__x;
-};
-#endif // _CCCL_HAS_NVFP16()
-
-#if _CCCL_HAS_NVBF16()
-struct __cccl_nvbf16_manip_helper : __nv_bfloat16
+template <class _Tp>
+struct __cccl_nvfp_manip_helper : _Tp
 {
-  using __nv_bfloat16::__x;
+  using _Tp::__x;
 };
-#endif // _CCCL_HAS_NVBF16()
 
 template <class _Tp>
 [[nodiscard]] _CCCL_API constexpr _Tp __fp_from_storage(__fp_storage_of_t<_Tp> __v) noexcept
@@ -102,39 +93,39 @@ template <class _Tp>
 #if _CCCL_HAS_NVFP16()
   else if constexpr (is_same_v<_Tp, __half>)
   {
-    __cccl_nvfp16_manip_helper __helper{};
+    __cccl_nvfp_manip_helper<_Tp> __helper{};
     __helper.__x = __v;
     return __helper;
   }
 #endif // _CCCL_HAS_NVFP16()
 #if _CCCL_HAS_NVBF16()
   else if constexpr (is_same_v<_Tp, __nv_bfloat16>)
   {
-    __cccl_nvbf16_manip_helper __helper{};
+    __cccl_nvfp_manip_helper<_Tp> __helper{};
     __helper.__x = __v;
     return __helper;
   }
 #endif // _CCCL_HAS_NVBF16()
 #if _CCCL_HAS_NVFP8_E4M3()
   else if constexpr (is_same_v<_Tp, __nv_fp8_e4m3>)
   {
-    __nv_fp8_e4m3 __ret{};
+    _Tp __ret{};
     __ret.__x = __v;
     return __ret;
   }
 #endif // _CCCL_HAS_NVFP8_E4M3()
 #if _CCCL_HAS_NVFP8_E5M2()
   else if constexpr (is_same_v<_Tp, __nv_fp8_e5m2>)
   {
-    __nv_fp8_e5m2 __ret{};
+    _Tp __ret{};
     __ret.__x = __v;
     return __ret;
   }
 #endif // _CCCL_HAS_NVFP8_E5M2()
 #if _CCCL_HAS_NVFP8_E8M0()
   else if constexpr (is_same_v<_Tp, __nv_fp8_e8m0>)
   {
-    __nv_fp8_e8m0 __ret{};
+    _Tp __ret{};
     __ret.__x = __v;
     return __ret;
   }
@@ -143,7 +134,7 @@ template <class _Tp>
   else if constexpr (is_same_v<_Tp, __nv_fp6_e2m3>)
   {
     _CCCL_ASSERT((__v & 0xc0u) == 0u, "Invalid __nv_fp6_e2m3 storage value");
-    __nv_fp6_e2m3 __ret{};
+    _Tp __ret{};
     __ret.__x = __v;
     return __ret;
   }
@@ -152,7 +143,7 @@ template <class _Tp>
   else if constexpr (is_same_v<_Tp, __nv_fp6_e3m2>)
   {
     _CCCL_ASSERT((__v & 0xc0u) == 0u, "Invalid __nv_fp6_e3m2 storage value");
-    __nv_fp6_e3m2 __ret{};
+    _Tp __ret{};
     __ret.__x = __v;
     return __ret;
   }
@@ -161,7 +152,7 @@ template <class _Tp>
   else if constexpr (is_same_v<_Tp, __nv_fp4_e2m1>)
   {
     _CCCL_ASSERT((__v & 0xf0u) == 0u, "Invalid __nv_fp4_e2m1 storage value");
-    __nv_fp4_e2m1 __ret{};
+    _Tp __ret{};
     __ret.__x = __v;
     return __ret;
   }
@@ -190,13 +181,13 @@ template <class _Tp>
 #if _CCCL_HAS_NVFP16()
   else if constexpr (is_same_v<_Tp, __half>)
   {
-    return __cccl_nvfp16_manip_helper{__v}.__x;
+    return __cccl_nvfp_manip_helper<_Tp>{__v}.__x;
   }
 #endif // _CCCL_HAS_NVFP16()
 #if _CCCL_HAS_NVBF16()
   else if constexpr (is_same_v<_Tp, __nv_bfloat16>)
   {
-    return __cccl_nvbf16_manip_helper{__v}.__x;
+    return __cccl_nvfp_manip_helper<_Tp>{__v}.__x;
   }
 #endif // _CCCL_HAS_NVBF16()
 #if _CCCL_HAS_NVFP8_E4M3()
 
@@ -21,7 +21,6 @@
 #  pragma system_header
 #endif // no system header
 
-#include <cuda/std/__floating_point/cuda_fp_types.h>
 #include <cuda/std/__floating_point/properties.h>
 #include <cuda/std/__fwd/fp.h>
 
 
@@ -58,7 +58,7 @@ enum class __numeric_limits_type
 };
 
 template <class _Tp>
-_CCCL_API constexpr __numeric_limits_type __make_numeric_limits_type()
+[[nodiscard]] _CCCL_API _CCCL_CONSTEVAL __numeric_limits_type __make_numeric_limits_type() noexcept
 {
   if constexpr (is_same_v<_Tp, bool>)
   {
@@ -78,7 +78,16 @@ _CCCL_API constexpr __numeric_limits_type __make_numeric_limits_type()
   }
 }
 
-template <class _Tp, __numeric_limits_type = __make_numeric_limits_type<_Tp>()>
+// To avoid including nvfp headers, we add the _Up type defaulted to _Tp which makes the specialization still be a
+// template, which won't be instantiated unless the numeric_limits<_Tp> class is instantiated. The specialization should
+// look as:
+//
+// template <class _Tp>
+// class __numeric_limits_impl<__nvfp_type, __numeric_limits_type::__floating_point, _Tp>
+// { ... };
+//
+// and _Tp should be used everywhere instead of __nvfp_type.
+template <class _Tp, __numeric_limits_type = __make_numeric_limits_type<_Tp>(), class _Up = _Tp>
 class __numeric_limits_impl
 {
 public:
Original file line number	Diff line number	Diff line change
`@@ -58,7 +58,7 @@ enum class __numeric_limits_type`
`58`	`58`	`};`
`59`	`59`
`60`	`60`	`template <class _Tp>`
`61`		`-_CCCL_API constexpr __numeric_limits_type __make_numeric_limits_type()`
	`61`	`+[[nodiscard]] _CCCL_API _CCCL_CONSTEVAL __numeric_limits_type __make_numeric_limits_type() noexcept`
`62`	`62`	`{`
`63`	`63`	`if constexpr (is_same_v<_Tp, bool>)`
`64`	`64`	`{`
`@@ -78,7 +78,16 @@ _CCCL_API constexpr __numeric_limits_type __make_numeric_limits_type()`
`78`	`78`	`}`
`79`	`79`	`}`
`80`	`80`
`81`		`-template <class _Tp, __numeric_limits_type = __make_numeric_limits_type<_Tp>()>`
	`81`	`+// To avoid including nvfp headers, we add the _Up type defaulted to _Tp which makes the specialization still be a`
	`82`	`+// template, which won't be instantiated unless the numeric_limits<_Tp> class is instantiated. The specialization should`
	`83`	`+// look as:`
	`84`	`+//`
	`85`	`+// template <class _Tp>`
	`86`	`+// class __numeric_limits_impl<__nvfp_type, __numeric_limits_type::__floating_point, _Tp>`
	`87`	`+// { ... };`
	`88`	`+//`
	`89`	`+// and _Tp should be used everywhere instead of __nvfp_type.`
	`90`	`+template <class _Tp, __numeric_limits_type = __make_numeric_limits_type<_Tp>(), class _Up = _Tp>`
`82`	`91`	`class __numeric_limits_impl`
`83`	`92`	`{`
`84`	`93`	`public:`