diff --git a/python/pyarrow/includes/common.pxd b/python/pyarrow/includes/common.pxd index a99da416374ad..f97f2ff1c4732 100644 --- a/python/pyarrow/includes/common.pxd +++ b/python/pyarrow/includes/common.pxd @@ -83,9 +83,6 @@ cdef extern from "": void Py_XDECREF(PyObject* o) Py_ssize_t Py_REFCNT(PyObject* o) -cdef extern from "numpy/halffloat.h": - ctypedef uint16_t npy_half - cdef extern from "arrow/api.h" namespace "arrow" nogil: # We can later add more of the common status factory methods as needed cdef CStatus CStatus_OK "arrow::Status::OK"() diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd index 2ccadbc6e4683..b7a55c6219bf2 100644 --- a/python/pyarrow/includes/libarrow.pxd +++ b/python/pyarrow/includes/libarrow.pxd @@ -1314,7 +1314,7 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil: uint64_t value cdef cppclass CHalfFloatScalar" arrow::HalfFloatScalar"(CScalar): - npy_half value + uint16_t value cdef cppclass CFloatScalar" arrow::FloatScalar"(CScalar): float value diff --git a/python/pyarrow/includes/libarrow_python.pxd b/python/pyarrow/includes/libarrow_python.pxd index cd4474f8345df..e544aa0165d86 100644 --- a/python/pyarrow/includes/libarrow_python.pxd +++ b/python/pyarrow/includes/libarrow_python.pxd @@ -55,7 +55,7 @@ cdef extern from "arrow/python/arrow_to_pandas.h" namespace "arrow::py::MapConve cdef extern from "arrow/python/api.h" namespace "arrow::py" nogil: shared_ptr[CDataType] GetPrimitiveType(Type type) - object PyHalf_FromHalf(npy_half value) + object PyFloat_FromHalf(uint16_t value) cdef cppclass PyConversionOptions: PyConversionOptions() diff --git a/python/pyarrow/scalar.pxi b/python/pyarrow/scalar.pxi index 4f13df2efdbfe..a9cdcff4e4195 100644 --- a/python/pyarrow/scalar.pxi +++ b/python/pyarrow/scalar.pxi @@ -388,7 +388,7 @@ cdef class HalfFloatScalar(Scalar): This parameter is ignored for non-nested Scalars. """ cdef CHalfFloatScalar* sp = self.wrapped.get() - return PyHalf_FromHalf(sp.value) if sp.is_valid else None + return PyFloat_FromHalf(sp.value) if sp.is_valid else None cdef class FloatScalar(Scalar): diff --git a/python/pyarrow/src/arrow/python/helpers.cc b/python/pyarrow/src/arrow/python/helpers.cc index 46014a25c2dee..0a24b259310e5 100644 --- a/python/pyarrow/src/arrow/python/helpers.cc +++ b/python/pyarrow/src/arrow/python/helpers.cc @@ -16,6 +16,7 @@ // under the License. // helpers.h includes a NumPy header, so we include this first +#include "arrow/python/numpy_init.h" #include "arrow/python/numpy_interop.h" #include "arrow/python/helpers.h" @@ -31,6 +32,7 @@ #include "arrow/type_fwd.h" #include "arrow/util/checked_cast.h" #include "arrow/util/config.h" +#include "arrow/util/float16.h" #include "arrow/util/logging.h" namespace arrow { @@ -73,21 +75,22 @@ std::shared_ptr GetPrimitiveType(Type::type type) { } } -PyObject* PyHalf_FromHalf(npy_half value) { - PyObject* result = PyArrayScalar_New(Half); - if (result != NULL) { - PyArrayScalar_ASSIGN(result, Half, value); - } - return result; +PyObject* PyFloat_FromHalf(uint16_t value) { + // Convert the uint16_t Float16 value to a PyFloat object + arrow::util::Float16 half_val = arrow::util::Float16::FromBits(value); + return PyFloat_FromDouble(half_val.ToDouble()); } -Status PyFloat_AsHalf(PyObject* obj, npy_half* out) { - if (PyArray_IsScalar(obj, Half)) { - *out = PyArrayScalar_VAL(obj, Half); - return Status::OK(); +Result PyFloat_AsHalf(PyObject* obj) { + if (PyFloat_Check(obj)) { + arrow::util::Float16 half_val = + arrow::util::Float16::FromDouble(PyFloat_AsDouble(obj)); + return half_val.bits(); + } else if (has_numpy() && PyArray_IsScalar(obj, Half)) { + return PyArrayScalar_VAL(obj, Half); } else { - // XXX: cannot use npy_double_to_half() without linking with Numpy - return Status::TypeError("Expected np.float16 instance"); + return Status::TypeError("conversion to float16 expects a `float` or ", + "`np.float16` object, got ", Py_TYPE(obj)->tp_name); } } diff --git a/python/pyarrow/src/arrow/python/helpers.h b/python/pyarrow/src/arrow/python/helpers.h index e2fd8212ae68d..b0cf1010289ea 100644 --- a/python/pyarrow/src/arrow/python/helpers.h +++ b/python/pyarrow/src/arrow/python/helpers.h @@ -26,8 +26,6 @@ #include "arrow/python/numpy_interop.h" -#include - #include "arrow/python/visibility.h" #include "arrow/type.h" #include "arrow/util/macros.h" @@ -43,11 +41,11 @@ class OwnedRef; // \return A shared pointer to DataType ARROW_PYTHON_EXPORT std::shared_ptr GetPrimitiveType(Type::type type); -// \brief Construct a np.float16 object from a npy_half value. -ARROW_PYTHON_EXPORT PyObject* PyHalf_FromHalf(npy_half value); +// \brief Construct a Python float object from a half-float uint16_t value. +ARROW_PYTHON_EXPORT PyObject* PyFloat_FromHalf(uint16_t value); -// \brief Convert a Python object to a npy_half value. -ARROW_PYTHON_EXPORT Status PyFloat_AsHalf(PyObject* obj, npy_half* out); +// \brief Convert a Python object to a half-float uint16_t value. +ARROW_PYTHON_EXPORT Result PyFloat_AsHalf(PyObject* obj); namespace internal { diff --git a/python/pyarrow/src/arrow/python/python_to_arrow.cc b/python/pyarrow/src/arrow/python/python_to_arrow.cc index b49b77cb6d66c..139eb1d7f4ffe 100644 --- a/python/pyarrow/src/arrow/python/python_to_arrow.cc +++ b/python/pyarrow/src/arrow/python/python_to_arrow.cc @@ -226,9 +226,16 @@ class PyValue { } static Result Convert(const HalfFloatType*, const O&, I obj) { - uint16_t value; - RETURN_NOT_OK(PyFloat_AsHalf(obj, &value)); - return value; + if (internal::PyFloatScalar_Check(obj)) { + return PyFloat_AsHalf(obj); + } else if (internal::PyIntScalar_Check(obj)) { + double float_val{}; + RETURN_NOT_OK(internal::IntegerScalarToDoubleSafe(obj, &float_val)); + const auto half_val = arrow::util::Float16::FromDouble(float_val); + return half_val.bits(); + } else { + return internal::InvalidValue(obj, "tried to convert to float16"); + } } static Result Convert(const FloatType*, const O&, I obj) { diff --git a/python/pyarrow/src/arrow/python/type_traits.h b/python/pyarrow/src/arrow/python/type_traits.h index a941577f76558..865e1af427671 100644 --- a/python/pyarrow/src/arrow/python/type_traits.h +++ b/python/pyarrow/src/arrow/python/type_traits.h @@ -26,9 +26,8 @@ #include "arrow/python/numpy_interop.h" -#include - #include "arrow/type_fwd.h" +#include "arrow/util/float16.h" #include "arrow/util/logging.h" namespace arrow { @@ -87,15 +86,18 @@ NPY_INT_DECL(ULONGLONG, UInt64, uint64_t); template <> struct npy_traits { - typedef npy_half value_type; + typedef uint16_t value_type; using TypeClass = HalfFloatType; using BuilderClass = HalfFloatBuilder; - static constexpr npy_half na_sentinel = NPY_HALF_NAN; + static constexpr uint16_t na_sentinel = + std::numeric_limits::quiet_NaN().bits(); static constexpr bool supports_nulls = true; - static inline bool isnull(npy_half v) { return v == NPY_HALF_NAN; } + static inline bool isnull(uint16_t v) { + return arrow::util::Float16::FromBits(v).is_nan(); + } }; template <> @@ -201,7 +203,8 @@ template <> struct arrow_traits { static constexpr int npy_type = NPY_FLOAT16; static constexpr bool supports_nulls = true; - static constexpr uint16_t na_value = NPY_HALF_NAN; + static constexpr uint16_t na_value = + std::numeric_limits::quiet_NaN().bits(); typedef typename npy_traits::value_type T; }; diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py index 8a76512cb51f1..1a964cda6c0b5 100644 --- a/python/pyarrow/tests/test_array.py +++ b/python/pyarrow/tests/test_array.py @@ -1664,6 +1664,16 @@ def test_floating_point_truncate_unsafe(): _check_cast_case(case, safe=False) +def test_half_float_array_from_python(): + # GH-46611 + arr = pa.array([1.0, 2.0, 3, None, 12345.6789, 1.234567], type=pa.float16()) + assert arr.type == pa.float16() + assert arr.to_pylist() == [1.0, 2.0, 3.0, None, 12344.0, 1.234375] + msg1 = "Could not convert 'a' with type str: tried to convert to float16" + with pytest.raises(pa.ArrowInvalid, match=msg1): + arr = pa.array(['a', 3, None], type=pa.float16()) + + def test_decimal_to_int_safe(): safe_cases = [ ( @@ -2281,10 +2291,11 @@ def test_array_conversions_no_sentinel_values(): assert arr2.type == 'int8' - arr3 = pa.array(np.array([1, np.nan, 2, 3, np.nan, 4], dtype='float32'), - type='float32') - assert arr3.type == 'float32' - assert arr3.null_count == 0 + for ty in ['float16', 'float32', 'float64']: + arr3 = pa.array(np.array([1, np.nan, 2, 3, np.nan, 4], dtype=ty), + type=ty) + assert arr3.type == ty + assert arr3.null_count == 0 def test_time32_time64_from_integer(): diff --git a/python/pyarrow/tests/test_pandas.py b/python/pyarrow/tests/test_pandas.py index 9498c0a3deb5e..ceea2527da053 100644 --- a/python/pyarrow/tests/test_pandas.py +++ b/python/pyarrow/tests/test_pandas.py @@ -1007,14 +1007,14 @@ def test_half_floats_from_numpy(self): arr = np.array([1.5, np.nan], dtype=np.float16) a = pa.array(arr, type=pa.float16()) x, y = a.to_pylist() - assert isinstance(x, np.float16) + assert isinstance(x, float) assert x == 1.5 - assert isinstance(y, np.float16) + assert isinstance(y, float) assert np.isnan(y) a = pa.array(arr, type=pa.float16(), from_pandas=True) x, y = a.to_pylist() - assert isinstance(x, np.float16) + assert isinstance(x, float) assert x == 1.5 assert y is None diff --git a/python/pyarrow/tests/test_scalars.py b/python/pyarrow/tests/test_scalars.py index 6ef25b82f8e33..14f6ccef626e1 100644 --- a/python/pyarrow/tests/test_scalars.py +++ b/python/pyarrow/tests/test_scalars.py @@ -42,6 +42,7 @@ (1, pa.uint32(), pa.UInt32Scalar), (1, pa.int64(), pa.Int64Scalar), (1, pa.uint64(), pa.UInt64Scalar), + (1.0, pa.float16(), pa.HalfFloatScalar), (1.0, None, pa.DoubleScalar), (1.0, pa.float32(), pa.FloatScalar), (decimal.Decimal("1.123"), None, pa.Decimal128Scalar), @@ -238,15 +239,12 @@ def test_numerics(): assert str(s) == "1.5" assert s.as_py() == 1.5 - if np is not None: - # float16 - s = pa.scalar(np.float16(0.5), type='float16') - assert isinstance(s, pa.HalfFloatScalar) - # on numpy2 repr(np.float16(0.5)) == "np.float16(0.5)" - # on numpy1 repr(np.float16(0.5)) == "0.5" - assert repr(s) == f"" - assert str(s) == "0.5" - assert s.as_py() == 0.5 + # float16 + s = pa.scalar(0.5, type='float16') + assert isinstance(s, pa.HalfFloatScalar) + assert repr(s) == "" + assert str(s) == "0.5" + assert s.as_py() == 0.5 def test_decimal128():