Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pyarrow_bindings #405

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,11 @@ jobs:
run: |
python -m pip install numpy scipy
- name: Install pyarrow
if: ${{ !startsWith(matrix.python, 'pypy') && !contains(matrix.python, 'alpha') }}
run: |
python -m pip install pyarrow
- name: Configure
run: >
cmake -S . -B build -DNB_TEST_STABLE_ABI=ON -DNB_TEST_SHARED_BUILD="$(python3 -c 'import sys; print(int(sys.version_info.minor>=11))')"
Expand Down
94 changes: 94 additions & 0 deletions cmake/FindPyArrow.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
if(NOT Python_FOUND)
message(FATAL_ERROR "Could not find python. Make sure you called 'find_package(Python COMPONENTS Interpreter Development.Module REQUIRED)'")
endif()

message(STATUS "Using python ${Python_EXECUTABLE}")

execute_process(COMMAND "${Python_EXECUTABLE}" "-c" "import pyarrow as pa; pa.create_library_symlinks();"
RESULT_VARIABLE _PYARROW_CREATE_SYMLINKS_SUCCESS
ERROR_VARIABLE _PYARROW_ERROR_VALUE)

if(_PYARROW_CREATE_SYMLINKS_SUCCESS AND NOT _PYARROW_CREATE_SYMLINKS_SUCCESS EQUAL 0)
message(WARNING "FAILED pyarrow.create_library_symlinks(): ${_PYARROW_CREATE_SYMLINKS_SUCCESS}\n${_PYARROW_ERROR_VALUE}")
message(STATUS "Falling back to try using known versions for arrow library. You may have to set Arrow_ADDITIONAL_VERSIONS for newer versions.")
set(PYARROW_USE_KNOWN_VERSIONS TRUE)
else()
set(PYARROW_USE_KNOWN_VERSIONS FALSE)
endif()

execute_process(COMMAND "${Python_EXECUTABLE}" "-c" "import pyarrow as pa; print(pa.get_include());"
RESULT_VARIABLE _PYARROW_SEARCH_SUCCESS
OUTPUT_VARIABLE PYARROW_INCLUDE_DIR
ERROR_VARIABLE _PYARROW_ERROR_VALUE
OUTPUT_STRIP_TRAILING_WHITESPACE)

if(_PYARROW_SEARCH_SUCCESS AND NOT _PYARROW_SEARCH_SUCCESS EQUAL 0)
message(STATUS "FAILED: ${_PYARROW_SEARCH_SUCCESS}\n${_PYARROW_ERROR_VALUE}")
endif()

set(ARROW_INCLUDE_DIR ${PYARROW_INCLUDE_DIR})
execute_process(COMMAND "${Python_EXECUTABLE}" "-c" "import pyarrow as pa; print(pa.get_library_dirs());"
RESULT_VARIABLE _PYARROW_SEARCH_SUCCESS
OUTPUT_VARIABLE _PYARROW_VALUES_OUTPUT
ERROR_VARIABLE _PYARROW_ERROR_VALUE
OUTPUT_STRIP_TRAILING_WHITESPACE)

if(_PYARROW_SEARCH_SUCCESS AND NOT _PYARROW_SEARCH_SUCCESS EQUAL 0)
message(STATUS "FAILED: ${_PYARROW_SEARCH_SUCCESS}\n${_PYARROW_ERROR_VALUE}")
endif()

# convert to the path needed
string(REGEX REPLACE "," ";" _PYARROW_VALUES ${_PYARROW_VALUES_OUTPUT})
string(REGEX REPLACE "'" "" _PYARROW_VALUES ${_PYARROW_VALUES})
string(REGEX REPLACE "\\]" "" _PYARROW_VALUES ${_PYARROW_VALUES})
string(REGEX REPLACE "\\[" "" _PYARROW_VALUES ${_PYARROW_VALUES})
list(GET _PYARROW_VALUES 0 ARROW_SEARCH_LIB_PATH)

message(STATUS "include: ${PYARROW_INCLUDE_DIR} lib: ${ARROW_SEARCH_LIB_PATH}")

set(_arrow_TEST_VERSIONS arrow)
set(_pyarrow_TEST_VERSIONS arrow_python)
if (PYARROW_USE_KNOWN_VERSIONS)
set(_Arrow_KNOWN_VERSIONS ${Arrow_ADDITIONAL_VERSIONS}
"1800" "1700" "1600" "1500" "1400" "1300" "1200" "1100" "1000" "900" "800")

foreach(version ${_Arrow_KNOWN_VERSIONS})
list(APPEND _arrow_TEST_VERSIONS "libarrow.so.${version}")
list(APPEND _pyarrow_TEST_VERSIONS "libarrow_python.so.${version}")
endforeach()
endif()

find_library(ARROW_LIB NAMES ${_arrow_TEST_VERSIONS}
PATHS
${ARROW_SEARCH_LIB_PATH}
NO_DEFAULT_PATH)
message(STATUS "Found ${ARROW_LIB} in ${ARROW_SEARCH_LIB_PATH}")

find_library(ARROW_PYTHON_LIB NAMES ${_pyarrow_TEST_VERSIONS}
PATHS
${ARROW_SEARCH_LIB_PATH}
NO_DEFAULT_PATH)
message(STATUS "Found ${ARROW_PYTHON_LIB} in ${ARROW_SEARCH_LIB_PATH}")

find_package_handle_standard_args(PyArrow REQUIRED_VARS PYARROW_INCLUDE_DIR ARROW_LIB ARROW_PYTHON_LIB)

get_filename_component(ARROW_SONAME ${ARROW_LIB} NAME)
get_filename_component(PYARROW_SONAME ${ARROW_PYTHON_LIB} NAME)

add_library(arrow::arrow SHARED IMPORTED)
set_target_properties(arrow::arrow PROPERTIES
INTERFACE_INCLUDE_DIRECTORIES "${ARROW_INCLUDE_DIR}"
#INTERFACE_LINK_LIBRARIES "dl"
IMPORTED_LOCATION "${ARROW_LIB}"
IMPORTED_SONAME "${ARROW_SONAME}"
)

add_library(pyarrow::pyarrow SHARED IMPORTED)
set_target_properties(pyarrow::pyarrow PROPERTIES
IMPORTED_LOCATION "${ARROW_PYTHON_LIB}"
IMPORTED_SONAME ${PYARROW_SONAME})

add_library(nanobind::pyarrow INTERFACE IMPORTED)
set_property(TARGET nanobind::pyarrow PROPERTY
INTERFACE_LINK_LIBRARIES arrow::arrow pyarrow::pyarrow Python::Module)
# set_property(TARGET nanobind::pyarrow APPEND PROPERTY INTERFACE_COMPILE_DEFINITIONS _GLIBCXX_USE_CXX11_ABI=0)
16 changes: 15 additions & 1 deletion cmake/nanobind-config.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,21 @@ function (nanobind_build_library TARGET_NAME)
${NB_DIR}/include/nanobind/stl/vector.h
${NB_DIR}/include/nanobind/eigen/dense.h
${NB_DIR}/include/nanobind/eigen/sparse.h

${NB_DIR}/include/nanobind/pyarrow/detail/array_caster.h
${NB_DIR}/include/nanobind/pyarrow/detail/caster.h
${NB_DIR}/include/nanobind/pyarrow/array_binary.h
${NB_DIR}/include/nanobind/pyarrow/array_nested.h
${NB_DIR}/include/nanobind/pyarrow/array_primitive.h
${NB_DIR}/include/nanobind/pyarrow/buffer.h
${NB_DIR}/include/nanobind/pyarrow/chunked_array.h
${NB_DIR}/include/nanobind/pyarrow/datatype.h
${NB_DIR}/include/nanobind/pyarrow/pyarrow_import.h
${NB_DIR}/include/nanobind/pyarrow/record_batch.h
${NB_DIR}/include/nanobind/pyarrow/scalar.h
${NB_DIR}/include/nanobind/pyarrow/sparse_tensor.h
${NB_DIR}/include/nanobind/pyarrow/table.h
${NB_DIR}/include/nanobind/pyarrow/tensor.h
${NB_DIR}/include/nanobind/pyarrow/type.h
${NB_DIR}/src/buffer.h
${NB_DIR}/src/hash.h
${NB_DIR}/src/nb_internals.h
Expand Down
1 change: 1 addition & 0 deletions docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@ The nanobind logo was designed by `AndoTwin Studio
classes
exceptions
ndarray_index
pyarrow
packaging
utilities

Expand Down
76 changes: 76 additions & 0 deletions docs/pyarrow.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
.. _pyarrow:

PyArrow Bindings
================

nanobind can exchange ``pyarrow`` objects via a ``std::shared_ptr<..>``. To get started you have to

.. code-block:: cpp
#include <nanobind/pyarrow/pyarrow_import.h>
and make sure to call the following `pyarrow initialization <https://arrow.apache.org/docs/python/integration/extending.html#_CPPv4N5arrow14import_pyarrowEv>`__ on top of your module definition

.. code-block:: cpp
NB_MODULE(test_pyarrow_ext, m) {
static nanobind::detail::pyarrow::ImportPyarrow module;
// ...
}
The type caster headers are structured in a similar form than the headers in ``pyarrow`` (``array_primitive.h``, ``array_binary.h``, etc) itself:

.. list-table::
:widths: 42 48
:header-rows: 1

* - Types
- Type caster header
* - ``Array``, ``DoubleArray``, ``Int64Array``, ...
- ``#include <nanobind/pyarrow/array_primitive.h>``
* - ``BinaryArray``, ``LargeBinaryArray``, ``StringArray``, ``LargeStringArray``, ``FixedSizeBinaryArray``
- ``#include <nanobind/pyarrow/array_binary.h>``
* - ``ListArray``, ``LargeListArray``, ``MapArray``, ``FixedSizeListArray``, ``StructArray``, ``UnionArray``, ``SparseUnionArray``, ``DenseUnionArray``
- ``#include <nanobind/pyarrow/array_nested.h>``
* - ``ChunkedArray``
- ``#include <nanobind/pyarrow/chunked_array.h>``
* - ``Table``
- ``#include <nanobind/pyarrow/table.h>``
* - ``RecordBatch``
- ``#include <nanobind/pyarrow/record_batch.h>``
* - ``Field``, ``Schema``
- ``#include <nanobind/pyarrow/type.h>``
* - ``Scalars``
- ``#include <nanobind/pyarrow/scalar.h>``
* - ``DataTypes``
- ``#include <nanobind/pyarrow/datatype.h>``
* - ``Buffer``, ``ResizableBuffer``, ``MutableBuffer``
- ``#include <nanobind/pyarrow/buffer.h>``
* - ``Tensor``, ``NumericTensor<..>``
- ``#include <nanobind/pyarrow/tensor.h>``
* - ``SparseCOOTensor``, ``SparseCSCMatrix``, ``SparseCSFTensor``, ``SparseCSRMatrix``
- ``#include <nanobind/pyarrow/sparse_tensor.h>``

**Example**: The following code snippet shows how to create bindings for a ``pyarrow.DoubleArray``:

.. code-block:: cpp
#include <memory>
#include <nanobind/nanobind.h>
#include <nanobind/pyarrow/pyarrow_import.h>
#include <nanobind/pyarrow/array_primitive.h>
namespace nb = nanobind;
NB_MODULE(test_pyarrow_ext, m) {
static nb::detail::pyarrow::ImportPyarrow module;
m.def("my_pyarrow_function", [](std::shared_ptr<arrow::DoubleArray> arr) {
auto data = arr->data()->Copy();
return std::make_shared<arrow::DoubleArray>(std::move(data));
}
);
}
If you want to consume the ``C++`` artifacts as distributed by the ``PyPi`` ``pyarrow`` package in your own ``CMake``
project, please have a look at `FindPyArrow.cmake <https://github.com/wjakob/nanobind/cmake/FindPyArrow.cmake>`__.
37 changes: 37 additions & 0 deletions include/nanobind/pyarrow/array_binary.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
/*
nanobind/pyarrow/array_binary.h: conversion between arrow and pyarrow

Copyright (c) 2024 Maximilian Kleinert <[email protected]> and
Wenzel Jakob <[email protected]>

All rights reserved. Use of this source code is governed by a
BSD-style license that can be found in the LICENSE file.
*/
#pragma once

#include <nanobind/nanobind.h>
#include <memory>
#include <arrow/array/array_binary.h>
#include <nanobind/pyarrow/detail/array_caster.h>

NAMESPACE_BEGIN(NB_NAMESPACE)
NAMESPACE_BEGIN(detail)

#define NB_REGISTER_PYARROW_BINARY_ARRAY(name) \
template<> \
struct pyarrow::pyarrow_caster_name_trait<arrow::name> { \
static constexpr auto Name = const_name(NB_STRINGIFY(name)); \
} ; \
template<> \
struct type_caster<std::shared_ptr<arrow::name>> : pyarrow::pyarrow_array_caster<arrow::name> {};

NB_REGISTER_PYARROW_BINARY_ARRAY(BinaryArray)
NB_REGISTER_PYARROW_BINARY_ARRAY(LargeBinaryArray)
NB_REGISTER_PYARROW_BINARY_ARRAY(StringArray)
NB_REGISTER_PYARROW_BINARY_ARRAY(LargeStringArray)
NB_REGISTER_PYARROW_BINARY_ARRAY(FixedSizeBinaryArray)
#undef NB_REGISTER_PYARROW_BINARY_ARRAY


NAMESPACE_END(detail)
NAMESPACE_END(NB_NAMESPACE)
42 changes: 42 additions & 0 deletions include/nanobind/pyarrow/array_nested.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
/*
nanobind/pyarrow/array_nested.h: conversion between arrow and pyarrow

Copyright (c) 2024 Maximilian Kleinert <[email protected]> and
Wenzel Jakob <[email protected]>

All rights reserved. Use of this source code is governed by a
BSD-style license that can be found in the LICENSE file.
*/
#pragma once

#include <nanobind/nanobind.h>
#include <memory>
#include <nanobind/pyarrow/detail/array_caster.h>

#include <arrow/array/array_nested.h>

NAMESPACE_BEGIN(NB_NAMESPACE)
NAMESPACE_BEGIN(detail)

#define NB_REGISTER_PYARROW_NESTED_ARRAY(name) \
template<> \
struct pyarrow::pyarrow_caster_name_trait<arrow::name> { \
static constexpr auto Name = const_name(NB_STRINGIFY(name)); \
}; \
template<> \
struct type_caster<std::shared_ptr<arrow::name>> : pyarrow::pyarrow_array_caster<arrow::name> {};

// array_nested classes
NB_REGISTER_PYARROW_NESTED_ARRAY(ListArray)
NB_REGISTER_PYARROW_NESTED_ARRAY(LargeListArray)
NB_REGISTER_PYARROW_NESTED_ARRAY(MapArray)
NB_REGISTER_PYARROW_NESTED_ARRAY(FixedSizeListArray)
NB_REGISTER_PYARROW_NESTED_ARRAY(StructArray)
NB_REGISTER_PYARROW_NESTED_ARRAY(UnionArray)
NB_REGISTER_PYARROW_NESTED_ARRAY(SparseUnionArray)
NB_REGISTER_PYARROW_NESTED_ARRAY(DenseUnionArray)

#undef NB_REGISTER_PYARROW_NESTED_ARRAY

NAMESPACE_END(detail)
NAMESPACE_END(NB_NAMESPACE)
73 changes: 73 additions & 0 deletions include/nanobind/pyarrow/array_primitive.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
/*
nanobind/pyarrow/array_primitive.h: conversion between arrow and pyarrow

Copyright (c) 2024 Maximilian Kleinert <[email protected]> and
Wenzel Jakob <[email protected]>

All rights reserved. Use of this source code is governed by a
BSD-style license that can be found in the LICENSE file.
*/
#pragma once

#include <nanobind/nanobind.h>
#include <memory>
#include <arrow/array/array_primitive.h>
#include <nanobind/pyarrow/detail/array_caster.h>

NAMESPACE_BEGIN(NB_NAMESPACE)
NAMESPACE_BEGIN(detail)

#define NB_REGISTER_PYARROW_ARRAY(name) \
template<> \
struct pyarrow::pyarrow_caster_name_trait<arrow::name> { \
static constexpr auto Name = const_name(NB_STRINGIFY(name)); \
}; \
template<> \
struct type_caster<std::shared_ptr<arrow::name>> : pyarrow::pyarrow_array_caster<arrow::name> {};

// array_base classes
NB_REGISTER_PYARROW_ARRAY(Array)
NB_REGISTER_PYARROW_ARRAY(FlatArray)
NB_REGISTER_PYARROW_ARRAY(PrimitiveArray)
NB_REGISTER_PYARROW_ARRAY(NullArray)

// array_primitive classes
NB_REGISTER_PYARROW_ARRAY(BooleanArray)
NB_REGISTER_PYARROW_ARRAY(DayTimeIntervalArray)
NB_REGISTER_PYARROW_ARRAY(MonthDayNanoIntervalArray)

// numeric arrays
NB_REGISTER_PYARROW_ARRAY(HalfFloatArray)
NB_REGISTER_PYARROW_ARRAY(FloatArray)
NB_REGISTER_PYARROW_ARRAY(DoubleArray)

NB_REGISTER_PYARROW_ARRAY(Int8Array)
NB_REGISTER_PYARROW_ARRAY(Int16Array)
NB_REGISTER_PYARROW_ARRAY(Int32Array)
NB_REGISTER_PYARROW_ARRAY(Int64Array)

NB_REGISTER_PYARROW_ARRAY(UInt8Array)
NB_REGISTER_PYARROW_ARRAY(UInt16Array)
NB_REGISTER_PYARROW_ARRAY(UInt32Array)
NB_REGISTER_PYARROW_ARRAY(UInt64Array)

NB_REGISTER_PYARROW_ARRAY(Decimal128Array)
NB_REGISTER_PYARROW_ARRAY(Decimal256Array)

NB_REGISTER_PYARROW_ARRAY(Date32Array)
NB_REGISTER_PYARROW_ARRAY(Date64Array)

NB_REGISTER_PYARROW_ARRAY(Time32Array)
NB_REGISTER_PYARROW_ARRAY(Time64Array)
NB_REGISTER_PYARROW_ARRAY(MonthIntervalArray)
NB_REGISTER_PYARROW_ARRAY(DurationArray)

// extension array
NB_REGISTER_PYARROW_ARRAY(ExtensionArray)
// run end encoded array
NB_REGISTER_PYARROW_ARRAY(RunEndEncodedArray)

#undef NB_REGISTER_PYARROW_ARRAY

NAMESPACE_END(detail)
NAMESPACE_END(NB_NAMESPACE)
Loading
Loading