Skip to content

GH-46611: [Python][C++] Allow building float16 arrays without numpy #46618

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 13 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions python/pyarrow/includes/common.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -83,9 +83,6 @@ cdef extern from "<Python.h>":
void Py_XDECREF(PyObject* o)
Py_ssize_t Py_REFCNT(PyObject* o)

cdef extern from "numpy/halffloat.h":
ctypedef uint16_t npy_half

cdef extern from "arrow/api.h" namespace "arrow" nogil:
# We can later add more of the common status factory methods as needed
cdef CStatus CStatus_OK "arrow::Status::OK"()
Expand Down
2 changes: 1 addition & 1 deletion python/pyarrow/includes/libarrow.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -1314,7 +1314,7 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
uint64_t value

cdef cppclass CHalfFloatScalar" arrow::HalfFloatScalar"(CScalar):
npy_half value
uint16_t value

cdef cppclass CFloatScalar" arrow::FloatScalar"(CScalar):
float value
Expand Down
2 changes: 1 addition & 1 deletion python/pyarrow/includes/libarrow_python.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ cdef extern from "arrow/python/arrow_to_pandas.h" namespace "arrow::py::MapConve
cdef extern from "arrow/python/api.h" namespace "arrow::py" nogil:
shared_ptr[CDataType] GetPrimitiveType(Type type)

object PyHalf_FromHalf(npy_half value)
object PyFloat_FromHalf(uint16_t value)

cdef cppclass PyConversionOptions:
PyConversionOptions()
Expand Down
2 changes: 1 addition & 1 deletion python/pyarrow/scalar.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -388,7 +388,7 @@ cdef class HalfFloatScalar(Scalar):
This parameter is ignored for non-nested Scalars.
"""
cdef CHalfFloatScalar* sp = <CHalfFloatScalar*> self.wrapped.get()
return PyHalf_FromHalf(sp.value) if sp.is_valid else None
return PyFloat_FromHalf(sp.value) if sp.is_valid else None


cdef class FloatScalar(Scalar):
Expand Down
27 changes: 15 additions & 12 deletions python/pyarrow/src/arrow/python/helpers.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
// under the License.

// helpers.h includes a NumPy header, so we include this first
#include "arrow/python/numpy_init.h"
#include "arrow/python/numpy_interop.h"

#include "arrow/python/helpers.h"
Expand All @@ -31,6 +32,7 @@
#include "arrow/type_fwd.h"
#include "arrow/util/checked_cast.h"
#include "arrow/util/config.h"
#include "arrow/util/float16.h"
#include "arrow/util/logging.h"

namespace arrow {
Expand Down Expand Up @@ -73,21 +75,22 @@ std::shared_ptr<DataType> GetPrimitiveType(Type::type type) {
}
}

PyObject* PyHalf_FromHalf(npy_half value) {
PyObject* result = PyArrayScalar_New(Half);
if (result != NULL) {
PyArrayScalar_ASSIGN(result, Half, value);
}
return result;
PyObject* PyFloat_FromHalf(uint16_t value) {
// Convert the uint16_t Float16 value to a PyFloat object
arrow::util::Float16 half_val = arrow::util::Float16::FromBits(value);
return PyFloat_FromDouble(half_val.ToDouble());
}

Status PyFloat_AsHalf(PyObject* obj, npy_half* out) {
if (PyArray_IsScalar(obj, Half)) {
*out = PyArrayScalar_VAL(obj, Half);
return Status::OK();
Result<uint16_t> PyFloat_AsHalf(PyObject* obj) {
if (PyFloat_Check(obj)) {
float float_val = static_cast<float>(PyFloat_AsDouble(obj));
arrow::util::Float16 half_val = arrow::util::Float16::FromFloat(float_val);
return half_val.bits();
} else if (has_numpy() && PyArray_IsScalar(obj, Half)) {
return PyArrayScalar_VAL(obj, Half);
} else {
// XXX: cannot use npy_double_to_half() without linking with Numpy
return Status::TypeError("Expected np.float16 instance");
return Status::TypeError("conversion to float16 expects a `float` or ",
"`np.float16` object, got ", Py_TYPE(obj)->tp_name);
}
}

Expand Down
10 changes: 4 additions & 6 deletions python/pyarrow/src/arrow/python/helpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,6 @@

#include "arrow/python/numpy_interop.h"

#include <numpy/halffloat.h>

#include "arrow/python/visibility.h"
#include "arrow/type.h"
#include "arrow/util/macros.h"
Expand All @@ -43,11 +41,11 @@ class OwnedRef;
// \return A shared pointer to DataType
ARROW_PYTHON_EXPORT std::shared_ptr<DataType> GetPrimitiveType(Type::type type);

// \brief Construct a np.float16 object from a npy_half value.
ARROW_PYTHON_EXPORT PyObject* PyHalf_FromHalf(npy_half value);
// \brief Construct a Python float object from a half-float uint16_t value.
ARROW_PYTHON_EXPORT PyObject* PyFloat_FromHalf(uint16_t value);

// \brief Convert a Python object to a npy_half value.
ARROW_PYTHON_EXPORT Status PyFloat_AsHalf(PyObject* obj, npy_half* out);
// \brief Convert a Python object to a half-float uint16_t value.
ARROW_PYTHON_EXPORT Result<uint16_t> PyFloat_AsHalf(PyObject* obj);

namespace internal {

Expand Down
13 changes: 10 additions & 3 deletions python/pyarrow/src/arrow/python/python_to_arrow.cc
Original file line number Diff line number Diff line change
Expand Up @@ -226,9 +226,16 @@ class PyValue {
}

static Result<uint16_t> Convert(const HalfFloatType*, const O&, I obj) {
uint16_t value;
RETURN_NOT_OK(PyFloat_AsHalf(obj, &value));
return value;
if (internal::PyFloatScalar_Check(obj)) {
return PyFloat_AsHalf(obj);
} else if (internal::PyIntScalar_Check(obj)) {
float float_val{};
RETURN_NOT_OK(internal::IntegerScalarToFloat32Safe(obj, &float_val));
const auto half_val = arrow::util::Float16::FromFloat(float_val);
return half_val.bits();
} else {
return internal::InvalidValue(obj, "tried to convert to float16");
}
}

static Result<float> Convert(const FloatType*, const O&, I obj) {
Expand Down
15 changes: 9 additions & 6 deletions python/pyarrow/src/arrow/python/type_traits.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,8 @@

#include "arrow/python/numpy_interop.h"

#include <numpy/halffloat.h>

#include "arrow/type_fwd.h"
#include "arrow/util/float16.h"
#include "arrow/util/logging.h"

namespace arrow {
Expand Down Expand Up @@ -87,15 +86,18 @@ NPY_INT_DECL(ULONGLONG, UInt64, uint64_t);

template <>
struct npy_traits<NPY_FLOAT16> {
typedef npy_half value_type;
typedef uint16_t value_type;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note this could also be arrow::util::Float16, if that's easy to do.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is requiring quite a lot of changes around pyarrow/src/arrow/python/python_to_arrow.cc, pyarrow/src/arrow/python/arrow_to_pandas.cc and haven't been able to make it work yet. I would prefer to explore updating it on a different issue

using TypeClass = HalfFloatType;
using BuilderClass = HalfFloatBuilder;

static constexpr npy_half na_sentinel = NPY_HALF_NAN;
static constexpr uint16_t na_sentinel =
std::numeric_limits<arrow::util::Float16>::quiet_NaN().bits();

static constexpr bool supports_nulls = true;

static inline bool isnull(npy_half v) { return v == NPY_HALF_NAN; }
static inline bool isnull(uint16_t v) {
return arrow::util::Float16::FromBits(v).is_nan();
}
};

template <>
Expand Down Expand Up @@ -201,7 +203,8 @@ template <>
struct arrow_traits<Type::HALF_FLOAT> {
static constexpr int npy_type = NPY_FLOAT16;
static constexpr bool supports_nulls = true;
static constexpr uint16_t na_value = NPY_HALF_NAN;
static constexpr uint16_t na_value =
std::numeric_limits<arrow::util::Float16>::quiet_NaN().bits();
typedef typename npy_traits<NPY_FLOAT16>::value_type T;
};

Expand Down
19 changes: 15 additions & 4 deletions python/pyarrow/tests/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -1664,6 +1664,16 @@ def test_floating_point_truncate_unsafe():
_check_cast_case(case, safe=False)


def test_half_float_array_from_python():
# GH-46611
arr = pa.array([1.0, 2.0, 3, None, 12345.6789, 1.234567], type=pa.float16())
assert arr.type == pa.float16()
assert arr.to_pylist() == [1.0, 2.0, 3.0, None, 12344.0, 1.234375]
msg1 = "Could not convert 'a' with type str: tried to convert to float16"
with pytest.raises(pa.ArrowInvalid, match=msg1):
arr = pa.array(['a', 3, None], type=pa.float16())


def test_decimal_to_int_safe():
safe_cases = [
(
Expand Down Expand Up @@ -2281,10 +2291,11 @@ def test_array_conversions_no_sentinel_values():

assert arr2.type == 'int8'

arr3 = pa.array(np.array([1, np.nan, 2, 3, np.nan, 4], dtype='float32'),
type='float32')
assert arr3.type == 'float32'
assert arr3.null_count == 0
for ty in ['float16', 'float32', 'float64']:
arr3 = pa.array(np.array([1, np.nan, 2, 3, np.nan, 4], dtype=ty),
type=ty)
assert arr3.type == ty
assert arr3.null_count == 0


def test_time32_time64_from_integer():
Expand Down
6 changes: 3 additions & 3 deletions python/pyarrow/tests/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -1007,14 +1007,14 @@ def test_half_floats_from_numpy(self):
arr = np.array([1.5, np.nan], dtype=np.float16)
a = pa.array(arr, type=pa.float16())
x, y = a.to_pylist()
assert isinstance(x, np.float16)
assert isinstance(x, float)
assert x == 1.5
assert isinstance(y, np.float16)
assert isinstance(y, float)
assert np.isnan(y)

a = pa.array(arr, type=pa.float16(), from_pandas=True)
x, y = a.to_pylist()
assert isinstance(x, np.float16)
assert isinstance(x, float)
assert x == 1.5
assert y is None

Expand Down
16 changes: 7 additions & 9 deletions python/pyarrow/tests/test_scalars.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
(1, pa.uint32(), pa.UInt32Scalar),
(1, pa.int64(), pa.Int64Scalar),
(1, pa.uint64(), pa.UInt64Scalar),
(1.0, pa.float16(), pa.HalfFloatScalar),
(1.0, None, pa.DoubleScalar),
(1.0, pa.float32(), pa.FloatScalar),
(decimal.Decimal("1.123"), None, pa.Decimal128Scalar),
Expand Down Expand Up @@ -238,15 +239,12 @@ def test_numerics():
assert str(s) == "1.5"
assert s.as_py() == 1.5

if np is not None:
# float16
s = pa.scalar(np.float16(0.5), type='float16')
assert isinstance(s, pa.HalfFloatScalar)
# on numpy2 repr(np.float16(0.5)) == "np.float16(0.5)"
# on numpy1 repr(np.float16(0.5)) == "0.5"
assert repr(s) == f"<pyarrow.HalfFloatScalar: {np.float16(0.5)!r}>"
assert str(s) == "0.5"
assert s.as_py() == 0.5
# float16
s = pa.scalar(0.5, type='float16')
assert isinstance(s, pa.HalfFloatScalar)
assert repr(s) == "<pyarrow.HalfFloatScalar: 0.5>"
assert str(s) == "0.5"
assert s.as_py() == 0.5


def test_decimal128():
Expand Down
Loading