Skip to content

Commit 6c0d307

Browse files
authored
Use 'strtobool' instead of comparing with a string. (#988)
* Use 'strtobool' instead of comparing with a string. * Move the PropertyUtil methods to the properties module as functions * fixup! Use 'strtobool' instead of comparing with a string. * fixup! Use 'strtobool' instead of comparing with a string.
1 parent dafcf22 commit 6c0d307

File tree

12 files changed

+242
-98
lines changed

12 files changed

+242
-98
lines changed

pyiceberg/catalog/dynamodb.py

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@
6161
from pyiceberg.table.metadata import new_table_metadata
6262
from pyiceberg.table.sorting import UNSORTED_SORT_ORDER, SortOrder
6363
from pyiceberg.typedef import EMPTY_DICT, Identifier, Properties
64+
from pyiceberg.utils.properties import get_first_property_value
6465

6566
if TYPE_CHECKING:
6667
import pyarrow as pa
@@ -95,19 +96,17 @@ class DynamoDbCatalog(MetastoreCatalog):
9596
def __init__(self, name: str, **properties: str):
9697
super().__init__(name, **properties)
9798

98-
from pyiceberg.table import PropertyUtil
99-
10099
session = boto3.Session(
101-
profile_name=PropertyUtil.get_first_property_value(properties, DYNAMODB_PROFILE_NAME, DEPRECATED_PROFILE_NAME),
102-
region_name=PropertyUtil.get_first_property_value(properties, DYNAMODB_REGION, AWS_REGION, DEPRECATED_REGION),
100+
profile_name=get_first_property_value(properties, DYNAMODB_PROFILE_NAME, DEPRECATED_PROFILE_NAME),
101+
region_name=get_first_property_value(properties, DYNAMODB_REGION, AWS_REGION, DEPRECATED_REGION),
103102
botocore_session=properties.get(DEPRECATED_BOTOCORE_SESSION),
104-
aws_access_key_id=PropertyUtil.get_first_property_value(
103+
aws_access_key_id=get_first_property_value(
105104
properties, DYNAMODB_ACCESS_KEY_ID, AWS_ACCESS_KEY_ID, DEPRECATED_ACCESS_KEY_ID
106105
),
107-
aws_secret_access_key=PropertyUtil.get_first_property_value(
106+
aws_secret_access_key=get_first_property_value(
108107
properties, DYNAMODB_SECRET_ACCESS_KEY, AWS_SECRET_ACCESS_KEY, DEPRECATED_SECRET_ACCESS_KEY
109108
),
110-
aws_session_token=PropertyUtil.get_first_property_value(
109+
aws_session_token=get_first_property_value(
111110
properties, DYNAMODB_SESSION_TOKEN, AWS_SESSION_TOKEN, DEPRECATED_SESSION_TOKEN
112111
),
113112
)

pyiceberg/catalog/glue.py

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,6 @@
7171
from pyiceberg.table import (
7272
CommitTableRequest,
7373
CommitTableResponse,
74-
PropertyUtil,
7574
Table,
7675
)
7776
from pyiceberg.table.metadata import TableMetadata
@@ -98,6 +97,7 @@
9897
TimeType,
9998
UUIDType,
10099
)
100+
from pyiceberg.utils.properties import get_first_property_value, property_as_bool
101101

102102
if TYPE_CHECKING:
103103
import pyarrow as pa
@@ -298,19 +298,17 @@ class GlueCatalog(MetastoreCatalog):
298298
def __init__(self, name: str, **properties: Any):
299299
super().__init__(name, **properties)
300300

301-
from pyiceberg.table import PropertyUtil
302-
303301
session = boto3.Session(
304-
profile_name=PropertyUtil.get_first_property_value(properties, GLUE_PROFILE_NAME, DEPRECATED_PROFILE_NAME),
305-
region_name=PropertyUtil.get_first_property_value(properties, GLUE_REGION, AWS_REGION, DEPRECATED_REGION),
302+
profile_name=get_first_property_value(properties, GLUE_PROFILE_NAME, DEPRECATED_PROFILE_NAME),
303+
region_name=get_first_property_value(properties, GLUE_REGION, AWS_REGION, DEPRECATED_REGION),
306304
botocore_session=properties.get(DEPRECATED_BOTOCORE_SESSION),
307-
aws_access_key_id=PropertyUtil.get_first_property_value(
305+
aws_access_key_id=get_first_property_value(
308306
properties, GLUE_ACCESS_KEY_ID, AWS_ACCESS_KEY_ID, DEPRECATED_ACCESS_KEY_ID
309307
),
310-
aws_secret_access_key=PropertyUtil.get_first_property_value(
308+
aws_secret_access_key=get_first_property_value(
311309
properties, GLUE_SECRET_ACCESS_KEY, AWS_SECRET_ACCESS_KEY, DEPRECATED_SECRET_ACCESS_KEY
312310
),
313-
aws_session_token=PropertyUtil.get_first_property_value(
311+
aws_session_token=get_first_property_value(
314312
properties, GLUE_SESSION_TOKEN, AWS_SESSION_TOKEN, DEPRECATED_SESSION_TOKEN
315313
),
316314
)
@@ -368,7 +366,7 @@ def _update_glue_table(self, database_name: str, table_name: str, table_input: T
368366
self.glue.update_table(
369367
DatabaseName=database_name,
370368
TableInput=table_input,
371-
SkipArchive=PropertyUtil.property_as_bool(self.properties, GLUE_SKIP_ARCHIVE, GLUE_SKIP_ARCHIVE_DEFAULT),
369+
SkipArchive=property_as_bool(self.properties, GLUE_SKIP_ARCHIVE, GLUE_SKIP_ARCHIVE_DEFAULT),
372370
VersionId=version_id,
373371
)
374372
except self.glue.exceptions.EntityNotFoundException as e:

pyiceberg/catalog/hive.py

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,6 @@
8181
from pyiceberg.table import (
8282
CommitTableRequest,
8383
CommitTableResponse,
84-
PropertyUtil,
8584
StagedTable,
8685
Table,
8786
TableProperties,
@@ -109,6 +108,7 @@
109108
TimeType,
110109
UUIDType,
111110
)
111+
from pyiceberg.utils.properties import property_as_bool, property_as_float
112112

113113
if TYPE_CHECKING:
114114
import pyarrow as pa
@@ -259,13 +259,9 @@ def __init__(self, name: str, **properties: str):
259259
super().__init__(name, **properties)
260260
self._client = _HiveClient(properties["uri"], properties.get("ugi"))
261261

262-
self._lock_check_min_wait_time = PropertyUtil.property_as_float(
263-
properties, LOCK_CHECK_MIN_WAIT_TIME, DEFAULT_LOCK_CHECK_MIN_WAIT_TIME
264-
)
265-
self._lock_check_max_wait_time = PropertyUtil.property_as_float(
266-
properties, LOCK_CHECK_MAX_WAIT_TIME, DEFAULT_LOCK_CHECK_MAX_WAIT_TIME
267-
)
268-
self._lock_check_retries = PropertyUtil.property_as_float(
262+
self._lock_check_min_wait_time = property_as_float(properties, LOCK_CHECK_MIN_WAIT_TIME, DEFAULT_LOCK_CHECK_MIN_WAIT_TIME)
263+
self._lock_check_max_wait_time = property_as_float(properties, LOCK_CHECK_MAX_WAIT_TIME, DEFAULT_LOCK_CHECK_MAX_WAIT_TIME)
264+
self._lock_check_retries = property_as_float(
269265
properties,
270266
LOCK_CHECK_RETRIES,
271267
DEFAULT_LOCK_CHECK_RETRIES,
@@ -314,7 +310,7 @@ def _convert_iceberg_into_hive(self, table: Table) -> HiveTable:
314310
sd=_construct_hive_storage_descriptor(
315311
table.schema(),
316312
table.location(),
317-
PropertyUtil.property_as_bool(self.properties, HIVE2_COMPATIBLE, HIVE2_COMPATIBLE_DEFAULT),
313+
property_as_bool(self.properties, HIVE2_COMPATIBLE, HIVE2_COMPATIBLE_DEFAULT),
318314
),
319315
tableType=EXTERNAL_TABLE,
320316
parameters=_construct_parameters(table.metadata_location),

pyiceberg/catalog/rest.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@
7171
from pyiceberg.table.sorting import UNSORTED_SORT_ORDER, SortOrder, assign_fresh_sort_order_ids
7272
from pyiceberg.typedef import EMPTY_DICT, UTF8, IcebergBaseModel, Identifier, Properties
7373
from pyiceberg.types import transform_dict_value_to_str
74+
from pyiceberg.utils.properties import property_as_bool
7475

7576
if TYPE_CHECKING:
7677
import pyarrow as pa
@@ -257,7 +258,7 @@ def _create_session(self) -> Session:
257258
self._config_headers(session)
258259

259260
# Configure SigV4 Request Signing
260-
if str(self.properties.get(SIGV4, False)).lower() == "true":
261+
if property_as_bool(self.properties, SIGV4, False):
261262
self._init_sigv4(session)
262263

263264
return session

pyiceberg/conversions.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@
5757
TimestamptzType,
5858
TimeType,
5959
UUIDType,
60+
strtobool,
6061
)
6162
from pyiceberg.utils.datetime import date_to_days, datetime_to_micros, time_to_micros
6263
from pyiceberg.utils.decimal import decimal_to_bytes, unscaled_to_decimal
@@ -99,7 +100,7 @@ def partition_to_py(primitive_type: PrimitiveType, value_str: str) -> Union[int,
99100
@partition_to_py.register(BooleanType)
100101
@handle_none
101102
def _(primitive_type: BooleanType, value_str: str) -> Union[int, float, str, uuid.UUID]:
102-
return value_str.lower() == "true"
103+
return strtobool(value_str)
103104

104105

105106
@partition_to_py.register(IntegerType)

pyiceberg/expressions/parser.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@
6363
StringLiteral,
6464
)
6565
from pyiceberg.typedef import L
66+
from pyiceberg.types import strtobool
6667

6768
ParserElement.enablePackrat()
6869

@@ -96,7 +97,7 @@ def _(result: ParseResults) -> Reference:
9697

9798
@boolean.set_parse_action
9899
def _(result: ParseResults) -> BooleanExpression:
99-
if "true" == result.boolean.lower():
100+
if strtobool(result.boolean):
100101
return AlwaysTrue()
101102
else:
102103
return AlwaysFalse()

pyiceberg/io/fsspec.py

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@
7676
OutputStream,
7777
)
7878
from pyiceberg.typedef import Properties
79+
from pyiceberg.utils.properties import get_first_property_value, property_as_bool
7980

8081
logger = logging.getLogger(__name__)
8182

@@ -118,14 +119,12 @@ def _file(_: Properties) -> LocalFileSystem:
118119
def _s3(properties: Properties) -> AbstractFileSystem:
119120
from s3fs import S3FileSystem
120121

121-
from pyiceberg.table import PropertyUtil
122-
123122
client_kwargs = {
124123
"endpoint_url": properties.get(S3_ENDPOINT),
125-
"aws_access_key_id": PropertyUtil.get_first_property_value(properties, S3_ACCESS_KEY_ID, AWS_ACCESS_KEY_ID),
126-
"aws_secret_access_key": PropertyUtil.get_first_property_value(properties, S3_SECRET_ACCESS_KEY, AWS_SECRET_ACCESS_KEY),
127-
"aws_session_token": PropertyUtil.get_first_property_value(properties, S3_SESSION_TOKEN, AWS_SESSION_TOKEN),
128-
"region_name": PropertyUtil.get_first_property_value(properties, S3_REGION, AWS_REGION),
124+
"aws_access_key_id": get_first_property_value(properties, S3_ACCESS_KEY_ID, AWS_ACCESS_KEY_ID),
125+
"aws_secret_access_key": get_first_property_value(properties, S3_SECRET_ACCESS_KEY, AWS_SECRET_ACCESS_KEY),
126+
"aws_session_token": get_first_property_value(properties, S3_SESSION_TOKEN, AWS_SESSION_TOKEN),
127+
"region_name": get_first_property_value(properties, S3_REGION, AWS_REGION),
129128
}
130129
config_kwargs = {}
131130
register_events: Dict[str, Callable[[Properties], None]] = {}
@@ -165,11 +164,11 @@ def _gs(properties: Properties) -> AbstractFileSystem:
165164
token=properties.get(GCS_TOKEN),
166165
consistency=properties.get(GCS_CONSISTENCY, "none"),
167166
cache_timeout=properties.get(GCS_CACHE_TIMEOUT),
168-
requester_pays=properties.get(GCS_REQUESTER_PAYS, False),
167+
requester_pays=property_as_bool(properties, GCS_REQUESTER_PAYS, False),
169168
session_kwargs=json.loads(properties.get(GCS_SESSION_KWARGS, "{}")),
170169
endpoint_url=properties.get(GCS_ENDPOINT),
171170
default_location=properties.get(GCS_DEFAULT_LOCATION),
172-
version_aware=properties.get(GCS_VERSION_AWARE, "false").lower() == "true",
171+
version_aware=property_as_bool(properties, GCS_VERSION_AWARE, False),
173172
)
174173

175174

pyiceberg/io/pyarrow.py

Lines changed: 14 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,7 @@
158158
from pyiceberg.utils.config import Config
159159
from pyiceberg.utils.datetime import millis_to_datetime
160160
from pyiceberg.utils.deprecated import deprecated
161+
from pyiceberg.utils.properties import get_first_property_value, property_as_int
161162
from pyiceberg.utils.singleton import Singleton
162163
from pyiceberg.utils.truncate import truncate_upper_bound_binary_string, truncate_upper_bound_text_string
163164

@@ -345,14 +346,12 @@ def _initialize_fs(self, scheme: str, netloc: Optional[str] = None) -> FileSyste
345346
if scheme in {"s3", "s3a", "s3n"}:
346347
from pyarrow.fs import S3FileSystem
347348

348-
from pyiceberg.table import PropertyUtil
349-
350349
client_kwargs: Dict[str, Any] = {
351350
"endpoint_override": self.properties.get(S3_ENDPOINT),
352-
"access_key": PropertyUtil.get_first_property_value(self.properties, S3_ACCESS_KEY_ID, AWS_ACCESS_KEY_ID),
353-
"secret_key": PropertyUtil.get_first_property_value(self.properties, S3_SECRET_ACCESS_KEY, AWS_SECRET_ACCESS_KEY),
354-
"session_token": PropertyUtil.get_first_property_value(self.properties, S3_SESSION_TOKEN, AWS_SESSION_TOKEN),
355-
"region": PropertyUtil.get_first_property_value(self.properties, S3_REGION, AWS_REGION),
351+
"access_key": get_first_property_value(self.properties, S3_ACCESS_KEY_ID, AWS_ACCESS_KEY_ID),
352+
"secret_key": get_first_property_value(self.properties, S3_SECRET_ACCESS_KEY, AWS_SECRET_ACCESS_KEY),
353+
"session_token": get_first_property_value(self.properties, S3_SESSION_TOKEN, AWS_SESSION_TOKEN),
354+
"region": get_first_property_value(self.properties, S3_REGION, AWS_REGION),
356355
}
357356

358357
if proxy_uri := self.properties.get(S3_PROXY_URI):
@@ -2132,10 +2131,10 @@ def data_file_statistics_from_parquet_metadata(
21322131

21332132

21342133
def write_file(io: FileIO, table_metadata: TableMetadata, tasks: Iterator[WriteTask]) -> Iterator[DataFile]:
2135-
from pyiceberg.table import DOWNCAST_NS_TIMESTAMP_TO_US_ON_WRITE, PropertyUtil, TableProperties
2134+
from pyiceberg.table import DOWNCAST_NS_TIMESTAMP_TO_US_ON_WRITE, TableProperties
21362135

21372136
parquet_writer_kwargs = _get_parquet_writer_kwargs(table_metadata.properties)
2138-
row_group_size = PropertyUtil.property_as_int(
2137+
row_group_size = property_as_int(
21392138
properties=table_metadata.properties,
21402139
property_name=TableProperties.PARQUET_ROW_GROUP_SIZE_BYTES,
21412140
default=TableProperties.PARQUET_ROW_GROUP_SIZE_BYTES_DEFAULT,
@@ -2278,7 +2277,7 @@ def parquet_files_to_data_files(io: FileIO, table_metadata: TableMetadata, file_
22782277

22792278

22802279
def _get_parquet_writer_kwargs(table_properties: Properties) -> Dict[str, Any]:
2281-
from pyiceberg.table import PropertyUtil, TableProperties
2280+
from pyiceberg.table import TableProperties
22822281

22832282
for key_pattern in [
22842283
TableProperties.PARQUET_ROW_GROUP_SIZE_BYTES,
@@ -2290,7 +2289,7 @@ def _get_parquet_writer_kwargs(table_properties: Properties) -> Dict[str, Any]:
22902289
raise NotImplementedError(f"Parquet writer option(s) {unsupported_keys} not implemented")
22912290

22922291
compression_codec = table_properties.get(TableProperties.PARQUET_COMPRESSION, TableProperties.PARQUET_COMPRESSION_DEFAULT)
2293-
compression_level = PropertyUtil.property_as_int(
2292+
compression_level = property_as_int(
22942293
properties=table_properties,
22952294
property_name=TableProperties.PARQUET_COMPRESSION_LEVEL,
22962295
default=TableProperties.PARQUET_COMPRESSION_LEVEL_DEFAULT,
@@ -2301,17 +2300,17 @@ def _get_parquet_writer_kwargs(table_properties: Properties) -> Dict[str, Any]:
23012300
return {
23022301
"compression": compression_codec,
23032302
"compression_level": compression_level,
2304-
"data_page_size": PropertyUtil.property_as_int(
2303+
"data_page_size": property_as_int(
23052304
properties=table_properties,
23062305
property_name=TableProperties.PARQUET_PAGE_SIZE_BYTES,
23072306
default=TableProperties.PARQUET_PAGE_SIZE_BYTES_DEFAULT,
23082307
),
2309-
"dictionary_pagesize_limit": PropertyUtil.property_as_int(
2308+
"dictionary_pagesize_limit": property_as_int(
23102309
properties=table_properties,
23112310
property_name=TableProperties.PARQUET_DICT_SIZE_BYTES,
23122311
default=TableProperties.PARQUET_DICT_SIZE_BYTES_DEFAULT,
23132312
),
2314-
"write_batch_size": PropertyUtil.property_as_int(
2313+
"write_batch_size": property_as_int(
23152314
properties=table_properties,
23162315
property_name=TableProperties.PARQUET_PAGE_ROW_LIMIT,
23172316
default=TableProperties.PARQUET_PAGE_ROW_LIMIT_DEFAULT,
@@ -2331,11 +2330,11 @@ def _dataframe_to_data_files(
23312330
Returns:
23322331
An iterable that supplies datafiles that represent the table.
23332332
"""
2334-
from pyiceberg.table import DOWNCAST_NS_TIMESTAMP_TO_US_ON_WRITE, PropertyUtil, TableProperties, WriteTask
2333+
from pyiceberg.table import DOWNCAST_NS_TIMESTAMP_TO_US_ON_WRITE, TableProperties, WriteTask
23352334

23362335
counter = counter or itertools.count(0)
23372336
write_uuid = write_uuid or uuid.uuid4()
2338-
target_file_size: int = PropertyUtil.property_as_int( # type: ignore # The property is set with non-None value.
2337+
target_file_size: int = property_as_int( # type: ignore # The property is set with non-None value.
23392338
properties=table_metadata.properties,
23402339
property_name=TableProperties.WRITE_TARGET_FILE_SIZE_BYTES,
23412340
default=TableProperties.WRITE_TARGET_FILE_SIZE_BYTES_DEFAULT,

0 commit comments

Comments
 (0)