Skip to content

Commit 29e94e7

Browse files
committed
Set field-id when needed (#1867)
Fixes #1798 <!-- Thanks for opening a pull request! --> <!-- In the case this PR will resolve an issue, please replace ${GITHUB_ISSUE_ID} below with the actual Github issue id. --> <!-- Closes #${GITHUB_ISSUE_ID} --> # Rationale for this change # Are these changes tested? # Are there any user-facing changes? <!-- In the case of user-facing changes, please add the changelog label. -->
1 parent cfb4f25 commit 29e94e7

File tree

2 files changed

+37
-1
lines changed

2 files changed

+37
-1
lines changed

pyiceberg/io/pyarrow.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1750,7 +1750,7 @@ def struct(
17501750
field_arrays.append(array)
17511751
fields.append(self._construct_field(field, array.type))
17521752
elif field.optional:
1753-
arrow_type = schema_to_pyarrow(field.field_type, include_field_ids=False)
1753+
arrow_type = schema_to_pyarrow(field.field_type, include_field_ids=self._include_field_ids)
17541754
field_arrays.append(pa.nulls(len(struct_array), type=arrow_type))
17551755
fields.append(self._construct_field(field, arrow_type))
17561756
else:

tests/integration/test_writes/test_writes.py

+36
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@
5151
DateType,
5252
DoubleType,
5353
IntegerType,
54+
ListType,
5455
LongType,
5556
NestedField,
5657
StringType,
@@ -1639,3 +1640,38 @@ def test_abort_table_transaction_on_exception(
16391640

16401641
# Validate the transaction is aborted and no partial update is applied
16411642
assert len(tbl.scan().to_pandas()) == table_size # type: ignore
1643+
1644+
1645+
@pytest.mark.integration
1646+
def test_write_optional_list(session_catalog: Catalog) -> None:
1647+
identifier = "default.test_write_optional_list"
1648+
schema = Schema(
1649+
NestedField(field_id=1, name="name", field_type=StringType(), required=False),
1650+
NestedField(
1651+
field_id=3,
1652+
name="my_list",
1653+
field_type=ListType(element_id=45, element=StringType(), element_required=False),
1654+
required=False,
1655+
),
1656+
)
1657+
session_catalog.create_table_if_not_exists(identifier, schema)
1658+
1659+
df_1 = pa.Table.from_pylist(
1660+
[
1661+
{"name": "one", "my_list": ["test"]},
1662+
{"name": "another", "my_list": ["test"]},
1663+
]
1664+
)
1665+
session_catalog.load_table(identifier).append(df_1)
1666+
1667+
assert len(session_catalog.load_table(identifier).scan().to_arrow()) == 2
1668+
1669+
df_2 = pa.Table.from_pylist(
1670+
[
1671+
{"name": "one"},
1672+
{"name": "another"},
1673+
]
1674+
)
1675+
session_catalog.load_table(identifier).append(df_2)
1676+
1677+
assert len(session_catalog.load_table(identifier).scan().to_arrow()) == 4

0 commit comments

Comments
 (0)