Skip to content

Commit

Permalink
add test for writing special character column name
Browse files Browse the repository at this point in the history
  • Loading branch information
kevinjqliu committed Apr 11, 2024
1 parent d7b5147 commit d278ee5
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 3 deletions.
19 changes: 19 additions & 0 deletions tests/integration/test_writes/test_writes.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,25 @@ def get_current_snapshot_id(identifier: str) -> int:
assert tbl.current_snapshot().snapshot_id == get_current_snapshot_id(identifier) # type: ignore


@pytest.mark.integration
def test_python_writes_special_character_column_with_spark_reads(spark: SparkSession, session_catalog: Catalog) -> None:
identifier = "default.python_writes_special_character_column_with_spark_reads"
column_name_with_special_character = "letter/abc"
TEST_DATA_WITH_SPECIAL_CHARACTER_COLUMN = {
column_name_with_special_character: ['a', None, 'z'],
}
pa_schema = pa.schema([
(column_name_with_special_character, pa.string()),
])
arrow_table_with_special_character_column = pa.Table.from_pydict(TEST_DATA_WITH_SPECIAL_CHARACTER_COLUMN, schema=pa_schema)
tbl = _create_table(session_catalog, identifier, {"format-version": "1"}, schema=pa_schema)

tbl.overwrite(arrow_table_with_special_character_column)
spark_df = spark.sql(f"SELECT * FROM {identifier}").toPandas()
pyiceberg_df = tbl.scan().to_pandas()
assert spark_df.equals(pyiceberg_df)


@pytest.mark.integration
def test_write_bin_pack_data_files(spark: SparkSession, session_catalog: Catalog, arrow_table_with_null: pa.Table) -> None:
identifier = "default.write_bin_pack_data_files"
Expand Down
7 changes: 4 additions & 3 deletions tests/integration/test_writes/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
# specific language governing permissions and limitations
# under the License.
# pylint:disable=redefined-outer-name
from typing import List, Optional
from typing import List, Optional, Union

import pyarrow as pa

Expand Down Expand Up @@ -65,6 +65,7 @@ def _create_table(
properties: Properties,
data: Optional[List[pa.Table]] = None,
partition_spec: Optional[PartitionSpec] = None,
schema: Union[Schema, "pa.Schema"] = TABLE_SCHEMA,
) -> Table:
try:
session_catalog.drop_table(identifier=identifier)
Expand All @@ -73,10 +74,10 @@ def _create_table(

if partition_spec:
tbl = session_catalog.create_table(
identifier=identifier, schema=TABLE_SCHEMA, properties=properties, partition_spec=partition_spec
identifier=identifier, schema=schema, properties=properties, partition_spec=partition_spec
)
else:
tbl = session_catalog.create_table(identifier=identifier, schema=TABLE_SCHEMA, properties=properties)
tbl = session_catalog.create_table(identifier=identifier, schema=schema, properties=properties)

if data:
for d in data:
Expand Down

0 comments on commit d278ee5

Please sign in to comment.