Skip to content

Commit

Permalink
Accept pyarrow LargeListType and FixedSizeListType (#458)
Browse files Browse the repository at this point in the history
* Accept pyarrow LargeListType and FixedSizeListType

* Combine the 3 register in one
  • Loading branch information
hussein-awala authored Feb 21, 2024
1 parent 0c7c21c commit 5ea0617
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 1 deletion.
4 changes: 3 additions & 1 deletion pyiceberg/io/pyarrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -690,7 +690,9 @@ def _(obj: pa.StructType, visitor: PyArrowSchemaVisitor[T]) -> T:


@visit_pyarrow.register(pa.ListType)
def _(obj: pa.ListType, visitor: PyArrowSchemaVisitor[T]) -> T:
@visit_pyarrow.register(pa.FixedSizeListType)
@visit_pyarrow.register(pa.LargeListType)
def _(obj: Union[pa.ListType, pa.LargeListType, pa.FixedSizeListType], visitor: PyArrowSchemaVisitor[T]) -> T:
visitor.before_list_element(obj.value_field)
result = visit_pyarrow(obj.value_type, visitor)
visitor.after_list_element(obj.value_field)
Expand Down
20 changes: 20 additions & 0 deletions tests/io/test_pyarrow_visitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,26 @@ def test_pyarrow_list_to_iceberg() -> None:
assert visit_pyarrow(pyarrow_list, _ConvertToIceberg()) == expected


def test_pyarrow_large_list_to_iceberg() -> None:
pyarrow_list = pa.large_list(pa.field("element", pa.int32(), nullable=False, metadata={"PARQUET:field_id": "1"}))
expected = ListType(
element_id=1,
element_type=IntegerType(),
element_required=True,
)
assert visit_pyarrow(pyarrow_list, _ConvertToIceberg()) == expected


def test_pyarrow_fixed_size_list_to_iceberg() -> None:
pyarrow_list = pa.list_(pa.field("element", pa.int32(), nullable=False, metadata={"PARQUET:field_id": "1"}), 1)
expected = ListType(
element_id=1,
element_type=IntegerType(),
element_required=True,
)
assert visit_pyarrow(pyarrow_list, _ConvertToIceberg()) == expected


def test_pyarrow_map_to_iceberg() -> None:
pyarrow_map = pa.map_(
pa.field("key", pa.int32(), nullable=False, metadata={"PARQUET:field_id": "1"}),
Expand Down

0 comments on commit 5ea0617

Please sign in to comment.