Skip to content

Commit

Permalink
Merge branch 'main' into object-storage-better-tests
Browse files Browse the repository at this point in the history
  • Loading branch information
Sreesh Maheshwar committed Jan 13, 2025
2 parents b2bf54b + c409678 commit d18c61a
Show file tree
Hide file tree
Showing 5 changed files with 28 additions and 27 deletions.
36 changes: 18 additions & 18 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion pyiceberg/table/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ class TableProperties:
WRITE_PY_LOCATION_PROVIDER_IMPL = "write.py-location-provider.impl"

OBJECT_STORE_ENABLED = "write.object-storage.enabled"
OBJECT_STORE_ENABLED_DEFAULT = False
OBJECT_STORE_ENABLED_DEFAULT = True

WRITE_OBJECT_STORE_PARTITIONED_PATHS = "write.object-storage.partitioned-paths"
WRITE_OBJECT_STORE_PARTITIONED_PATHS_DEFAULT = True
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ typing-extensions = "4.12.2"
pytest-mock = "3.14.0"
pyspark = "3.5.3"
cython = "3.0.11"
deptry = ">=0.14,<0.22"
deptry = ">=0.14,<0.23"
docutils = "!=0.21.post1" # https://github.com/python-poetry/poetry/issues/9248#issuecomment-2026240520

[tool.poetry.group.docs.dependencies]
Expand Down
6 changes: 4 additions & 2 deletions tests/integration/test_writes/test_partitioned_writes.py
Original file line number Diff line number Diff line change
Expand Up @@ -294,11 +294,13 @@ def test_object_storage_location_provider_excludes_partition_path(
PartitionField(source_id=nested_field.field_id, field_id=1001, transform=IdentityTransform(), name=part_col)
)

# write.object-storage.enabled and write.object-storage.partitioned-paths don't need to be specified as they're on by default
assert TableProperties.OBJECT_STORE_ENABLED_DEFAULT
assert TableProperties.WRITE_OBJECT_STORE_PARTITIONED_PATHS_DEFAULT
tbl = _create_table(
session_catalog=session_catalog,
identifier=f"default.arrow_table_v{format_version}_with_null_partitioned_on_col_{part_col}",
# write.object-storage.partitioned-paths defaults to True
properties={"format-version": str(format_version), TableProperties.OBJECT_STORE_ENABLED: True},
properties={"format-version": str(format_version)},
data=[arrow_table_with_null],
partition_spec=partition_spec,
)
Expand Down
9 changes: 4 additions & 5 deletions tests/table/test_locations.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,13 +39,13 @@ def new_data_location(self, data_file_name: str, partition_key: Optional[Partiti


def test_simple_location_provider_no_partition() -> None:
provider = load_location_provider(table_location="table_location", table_properties=EMPTY_DICT)
provider = load_location_provider(table_location="table_location", table_properties={"write.object-storage.enabled": "false"})

assert provider.new_data_location("my_file") == "table_location/data/my_file"


def test_simple_location_provider_with_partition() -> None:
provider = load_location_provider(table_location="table_location", table_properties=EMPTY_DICT)
provider = load_location_provider(table_location="table_location", table_properties={"write.object-storage.enabled": "false"})

assert provider.new_data_location("my_file", PARTITION_KEY) == "table_location/data/string_field=example_string/my_file"

Expand All @@ -72,7 +72,7 @@ def test_custom_location_provider_not_found() -> None:


def test_object_storage_no_partition() -> None:
provider = load_location_provider(table_location="table_location", table_properties={"write.object-storage.enabled": "true"})
provider = load_location_provider(table_location="table_location", table_properties=EMPTY_DICT)

location = provider.new_data_location("test.parquet")
parts = location.split("/")
Expand Down Expand Up @@ -109,7 +109,6 @@ def test_object_storage_partitioned_paths_disabled(partition_key: Optional[Parti
provider = load_location_provider(
table_location="table_location",
table_properties={
"write.object-storage.enabled": "true",
"write.object-storage.partitioned-paths": "false",
},
)
Expand All @@ -130,6 +129,6 @@ def test_object_storage_partitioned_paths_disabled(partition_key: Optional[Parti
],
)
def test_hash_injection(data_file_name: str, expected_hash: str) -> None:
provider = load_location_provider(table_location="table_location", table_properties={"write.object-storage.enabled": "true"})
provider = load_location_provider(table_location="table_location", table_properties=EMPTY_DICT)

assert provider.new_data_location(data_file_name) == f"table_location/data/{expected_hash}/{data_file_name}"

0 comments on commit d18c61a

Please sign in to comment.