apache · raulcd · May 27, 2025 · May 27, 2025 · May 27, 2025 · May 28, 2025
@@ -83,9 +83,6 @@ cdef extern from "<Python.h>":
     void Py_XDECREF(PyObject* o)
     Py_ssize_t Py_REFCNT(PyObject* o)
 
-cdef extern from "numpy/halffloat.h":
-    ctypedef uint16_t npy_half
-
 cdef extern from "arrow/api.h" namespace "arrow" nogil:
     # We can later add more of the common status factory methods as needed
     cdef CStatus CStatus_OK "arrow::Status::OK"()

@@ -1314,7 +1314,7 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
         uint64_t value
 
     cdef cppclass CHalfFloatScalar" arrow::HalfFloatScalar"(CScalar):
-        npy_half value
+        uint16_t value
 
     cdef cppclass CFloatScalar" arrow::FloatScalar"(CScalar):
         float value

@@ -55,7 +55,7 @@ cdef extern from "arrow/python/arrow_to_pandas.h" namespace "arrow::py::MapConve
 cdef extern from "arrow/python/api.h" namespace "arrow::py" nogil:
     shared_ptr[CDataType] GetPrimitiveType(Type type)
 
-    object PyHalf_FromHalf(npy_half value)
+    object PyFloat_FromHalf(uint16_t value)
 
     cdef cppclass PyConversionOptions:
         PyConversionOptions()

@@ -388,7 +388,7 @@ cdef class HalfFloatScalar(Scalar):
             This parameter is ignored for non-nested Scalars.
         """
         cdef CHalfFloatScalar* sp = <CHalfFloatScalar*> self.wrapped.get()
-        return PyHalf_FromHalf(sp.value) if sp.is_valid else None
+        return PyFloat_FromHalf(sp.value) if sp.is_valid else None
 
 
 cdef class FloatScalar(Scalar):

@@ -16,6 +16,7 @@
 // under the License.
 
 // helpers.h includes a NumPy header, so we include this first
+#include "arrow/python/numpy_init.h"
 #include "arrow/python/numpy_interop.h"
 
 #include "arrow/python/helpers.h"
@@ -31,6 +32,7 @@
 #include "arrow/type_fwd.h"
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/config.h"
+#include "arrow/util/float16.h"
 #include "arrow/util/logging.h"
 
 namespace arrow {
@@ -73,21 +75,22 @@ std::shared_ptr<DataType> GetPrimitiveType(Type::type type) {
   }
 }
 
-PyObject* PyHalf_FromHalf(npy_half value) {
-  PyObject* result = PyArrayScalar_New(Half);
-  if (result != NULL) {
-    PyArrayScalar_ASSIGN(result, Half, value);
-  }
-  return result;
+PyObject* PyFloat_FromHalf(uint16_t value) {
+  // Convert the uint16_t Float16 value to a PyFloat object
+  arrow::util::Float16 half_val = arrow::util::Float16::FromBits(value);
+  return PyFloat_FromDouble(half_val.ToDouble());
 }
 
-Status PyFloat_AsHalf(PyObject* obj, npy_half* out) {
-  if (PyArray_IsScalar(obj, Half)) {
-    *out = PyArrayScalar_VAL(obj, Half);
-    return Status::OK();
+Result<uint16_t> PyFloat_AsHalf(PyObject* obj) {
+  if (PyFloat_Check(obj)) {
+    float float_val = static_cast<float>(PyFloat_AsDouble(obj));
+    arrow::util::Float16 half_val = arrow::util::Float16::FromFloat(float_val);
+    return half_val.bits();
+  } else if (has_numpy() && PyArray_IsScalar(obj, Half)) {
+    return PyArrayScalar_VAL(obj, Half);
   } else {
-    // XXX: cannot use npy_double_to_half() without linking with Numpy
-    return Status::TypeError("Expected np.float16 instance");
+    return Status::TypeError("conversion to float16 expects a `float` or ",
+                             "`np.float16` object, got ", Py_TYPE(obj)->tp_name);
   }
 }
 

@@ -26,8 +26,6 @@
 
 #include "arrow/python/numpy_interop.h"
 
-#include <numpy/halffloat.h>
-
 #include "arrow/python/visibility.h"
 #include "arrow/type.h"
 #include "arrow/util/macros.h"
@@ -43,11 +41,11 @@ class OwnedRef;
 // \return A shared pointer to DataType
 ARROW_PYTHON_EXPORT std::shared_ptr<DataType> GetPrimitiveType(Type::type type);
 
-// \brief Construct a np.float16 object from a npy_half value.
-ARROW_PYTHON_EXPORT PyObject* PyHalf_FromHalf(npy_half value);
+// \brief Construct a Python float object from a half-float uint16_t value.
+ARROW_PYTHON_EXPORT PyObject* PyFloat_FromHalf(uint16_t value);
 
-// \brief Convert a Python object to a npy_half value.
-ARROW_PYTHON_EXPORT Status PyFloat_AsHalf(PyObject* obj, npy_half* out);
+// \brief Convert a Python object to a half-float uint16_t value.
+ARROW_PYTHON_EXPORT Result<uint16_t> PyFloat_AsHalf(PyObject* obj);
 
 namespace internal {
 

@@ -226,9 +226,16 @@ class PyValue {
   }
 
   static Result<uint16_t> Convert(const HalfFloatType*, const O&, I obj) {
-    uint16_t value;
-    RETURN_NOT_OK(PyFloat_AsHalf(obj, &value));
-    return value;
+    if (internal::PyFloatScalar_Check(obj)) {
+      return PyFloat_AsHalf(obj);
+    } else if (internal::PyIntScalar_Check(obj)) {
+      float float_val{};
+      RETURN_NOT_OK(internal::IntegerScalarToFloat32Safe(obj, &float_val));
+      const auto half_val = arrow::util::Float16::FromFloat(float_val);
+      return half_val.bits();
+    } else {
+      return internal::InvalidValue(obj, "tried to convert to float16");
+    }
   }
 
   static Result<float> Convert(const FloatType*, const O&, I obj) {

@@ -26,9 +26,8 @@
 
 #include "arrow/python/numpy_interop.h"
 
-#include <numpy/halffloat.h>
-
 #include "arrow/type_fwd.h"
+#include "arrow/util/float16.h"
 #include "arrow/util/logging.h"
 
 namespace arrow {
@@ -87,15 +86,18 @@ NPY_INT_DECL(ULONGLONG, UInt64, uint64_t);
 
 template <>
 struct npy_traits<NPY_FLOAT16> {
-  typedef npy_half value_type;
+  typedef uint16_t value_type;
   using TypeClass = HalfFloatType;
   using BuilderClass = HalfFloatBuilder;
 
-  static constexpr npy_half na_sentinel = NPY_HALF_NAN;
+  static constexpr uint16_t na_sentinel =
+      std::numeric_limits<arrow::util::Float16>::quiet_NaN().bits();
 
   static constexpr bool supports_nulls = true;
 
-  static inline bool isnull(npy_half v) { return v == NPY_HALF_NAN; }
+  static inline bool isnull(uint16_t v) {
+    return arrow::util::Float16::FromBits(v).is_nan();
+  }
 };
 
 template <>
@@ -201,7 +203,8 @@ template <>
 struct arrow_traits<Type::HALF_FLOAT> {
   static constexpr int npy_type = NPY_FLOAT16;
   static constexpr bool supports_nulls = true;
-  static constexpr uint16_t na_value = NPY_HALF_NAN;
+  static constexpr uint16_t na_value =
+      std::numeric_limits<arrow::util::Float16>::quiet_NaN().bits();
   typedef typename npy_traits<NPY_FLOAT16>::value_type T;
 };
 

@@ -1664,6 +1664,16 @@ def test_floating_point_truncate_unsafe():
         _check_cast_case(case, safe=False)
 
 
+def test_half_float_array_from_python():
+    # GH-46611
+    arr = pa.array([1.0, 2.0, 3, None, 12345.6789, 1.234567], type=pa.float16())
+    assert arr.type == pa.float16()
+    assert arr.to_pylist() == [1.0, 2.0, 3.0, None, 12344.0, 1.234375]
+    msg1 = "Could not convert 'a' with type str: tried to convert to float16"
+    with pytest.raises(pa.ArrowInvalid, match=msg1):
+        arr = pa.array(['a', 3, None], type=pa.float16())
+
+
 def test_decimal_to_int_safe():
     safe_cases = [
         (
@@ -2281,10 +2291,11 @@ def test_array_conversions_no_sentinel_values():
 
     assert arr2.type == 'int8'
 
-    arr3 = pa.array(np.array([1, np.nan, 2, 3, np.nan, 4], dtype='float32'),
-                    type='float32')
-    assert arr3.type == 'float32'
-    assert arr3.null_count == 0
+    for ty in ['float16', 'float32', 'float64']:
+        arr3 = pa.array(np.array([1, np.nan, 2, 3, np.nan, 4], dtype=ty),
+                        type=ty)
+        assert arr3.type == ty
+        assert arr3.null_count == 0
 
 
 def test_time32_time64_from_integer():

@@ -1007,14 +1007,14 @@ def test_half_floats_from_numpy(self):
         arr = np.array([1.5, np.nan], dtype=np.float16)
         a = pa.array(arr, type=pa.float16())
         x, y = a.to_pylist()
-        assert isinstance(x, np.float16)
+        assert isinstance(x, float)
         assert x == 1.5
-        assert isinstance(y, np.float16)
+        assert isinstance(y, float)
         assert np.isnan(y)
 
         a = pa.array(arr, type=pa.float16(), from_pandas=True)
         x, y = a.to_pylist()
-        assert isinstance(x, np.float16)
+        assert isinstance(x, float)
         assert x == 1.5
         assert y is None
 

@@ -42,6 +42,7 @@
     (1, pa.uint32(), pa.UInt32Scalar),
     (1, pa.int64(), pa.Int64Scalar),
     (1, pa.uint64(), pa.UInt64Scalar),
+    (1.0, pa.float16(), pa.HalfFloatScalar),
     (1.0, None, pa.DoubleScalar),
     (1.0, pa.float32(), pa.FloatScalar),
     (decimal.Decimal("1.123"), None, pa.Decimal128Scalar),
@@ -238,15 +239,12 @@ def test_numerics():
     assert str(s) == "1.5"
     assert s.as_py() == 1.5
 
-    if np is not None:
-        # float16
-        s = pa.scalar(np.float16(0.5), type='float16')
-        assert isinstance(s, pa.HalfFloatScalar)
-        # on numpy2 repr(np.float16(0.5)) == "np.float16(0.5)"
-        # on numpy1 repr(np.float16(0.5)) == "0.5"
-        assert repr(s) == f"<pyarrow.HalfFloatScalar: {np.float16(0.5)!r}>"
-        assert str(s) == "0.5"
-        assert s.as_py() == 0.5
+    # float16
+    s = pa.scalar(0.5, type='float16')
+    assert isinstance(s, pa.HalfFloatScalar)
+    assert repr(s) == "<pyarrow.HalfFloatScalar: 0.5>"
+    assert str(s) == "0.5"
+    assert s.as_py() == 0.5
 
 
 def test_decimal128():