Skip to content

Commit ceb020f

Browse files
Fokkondrluiskevinjqliu
authored andcommitted
Rename gcs.endpoint to gcs.service.host (apache#1007)
* Rename `gcs.endpoint` to `gcs.service.host` To make it in line with Java: https://github.com/apache/iceberg/blob/6ee6d1327d3811dbd5795c4e87efdc41b7a58eaa/gcp/src/main/java/org/apache/iceberg/gcp/GCPProperties.java#L32 * Import Co-authored-by: Andre Luis Anastacio <[email protected]> * Use `deprecation_message` instead Co-authored-by: Andre Luis Anastacio <[email protected]> * Use `deprecation_message` instead Co-authored-by: Andre Luis Anastacio <[email protected]> * Fix message * Update pyiceberg/io/fsspec.py Co-authored-by: Kevin Liu <[email protected]> * Update pyiceberg/io/fsspec.py Co-authored-by: Kevin Liu <[email protected]> * Update pyiceberg/io/pyarrow.py Co-authored-by: Kevin Liu <[email protected]> --------- Co-authored-by: Andre Luis Anastacio <[email protected]> Co-authored-by: Kevin Liu <[email protected]>
1 parent 15f6625 commit ceb020f

File tree

5 files changed

+22
-7
lines changed

5 files changed

+22
-7
lines changed

mkdocs/docs/configuration.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,7 @@ For the FileIO there are several configuration options available:
161161
| gcs.cache-timeout | 60 | Configure the cache expiration time in seconds for object metadata cache |
162162
| gcs.requester-pays | False | Configure whether to use requester-pays requests |
163163
| gcs.session-kwargs | {} | Configure a dict of parameters to pass on to aiohttp.ClientSession; can contain, for example, proxy settings. |
164-
| gcs.endpoint | <http://0.0.0.0:4443> | Configure an alternative endpoint for the GCS FileIO to access (format protocol://host:port) If not given, defaults to the value of environment variable "STORAGE_EMULATOR_HOST"; if that is not set either, will use the standard Google endpoint. |
164+
| gcs.service.host | <http://0.0.0.0:4443> | Configure an alternative endpoint for the GCS FileIO to access (format protocol://host:port) If not given, defaults to the value of environment variable "STORAGE_EMULATOR_HOST"; if that is not set either, will use the standard Google endpoint. |
165165
| gcs.default-location | US | Configure the default location where buckets are created, like 'US' or 'EUROPE-WEST3'. |
166166
| gcs.version-aware | False | Configure whether to support object versioning on the GCS bucket. |
167167

pyiceberg/io/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,7 @@
9494
GCS_REQUESTER_PAYS = "gcs.requester-pays"
9595
GCS_SESSION_KWARGS = "gcs.session-kwargs"
9696
GCS_ENDPOINT = "gcs.endpoint"
97+
GCS_SERVICE_HOST = "gcs.service.host"
9798
GCS_DEFAULT_LOCATION = "gcs.default-bucket-location"
9899
GCS_VERSION_AWARE = "gcs.version-aware"
99100
PYARROW_USE_LARGE_TYPES_ON_READ = "pyarrow.use-large-types-on-read"

pyiceberg/io/fsspec.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@
6464
GCS_ENDPOINT,
6565
GCS_PROJECT_ID,
6666
GCS_REQUESTER_PAYS,
67+
GCS_SERVICE_HOST,
6768
GCS_SESSION_KWARGS,
6869
GCS_TOKEN,
6970
GCS_VERSION_AWARE,
@@ -171,6 +172,12 @@ def _gs(properties: Properties) -> AbstractFileSystem:
171172
# https://gcsfs.readthedocs.io/en/latest/api.html#gcsfs.core.GCSFileSystem
172173
from gcsfs import GCSFileSystem
173174

175+
if properties.get(GCS_ENDPOINT):
176+
deprecation_message(
177+
deprecated_in="0.8.0",
178+
removed_in="0.9.0",
179+
help_message=f"The property {GCS_ENDPOINT} is deprecated, please use {GCS_SERVICE_HOST} instead",
180+
)
174181
return GCSFileSystem(
175182
project=properties.get(GCS_PROJECT_ID),
176183
access=properties.get(GCS_ACCESS, "full_control"),
@@ -179,7 +186,7 @@ def _gs(properties: Properties) -> AbstractFileSystem:
179186
cache_timeout=properties.get(GCS_CACHE_TIMEOUT),
180187
requester_pays=property_as_bool(properties, GCS_REQUESTER_PAYS, False),
181188
session_kwargs=json.loads(properties.get(GCS_SESSION_KWARGS, "{}")),
182-
endpoint_url=properties.get(GCS_ENDPOINT),
189+
endpoint_url=get_first_property_value(properties, GCS_SERVICE_HOST, GCS_ENDPOINT),
183190
default_location=properties.get(GCS_DEFAULT_LOCATION),
184191
version_aware=property_as_bool(properties, GCS_VERSION_AWARE, False),
185192
)

pyiceberg/io/pyarrow.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@
9191
AWS_SESSION_TOKEN,
9292
GCS_DEFAULT_LOCATION,
9393
GCS_ENDPOINT,
94+
GCS_SERVICE_HOST,
9495
GCS_TOKEN,
9596
GCS_TOKEN_EXPIRES_AT_MS,
9697
HDFS_HOST,
@@ -163,7 +164,7 @@
163164
from pyiceberg.utils.concurrent import ExecutorFactory
164165
from pyiceberg.utils.config import Config
165166
from pyiceberg.utils.datetime import millis_to_datetime
166-
from pyiceberg.utils.deprecated import deprecated
167+
from pyiceberg.utils.deprecated import deprecated, deprecation_message
167168
from pyiceberg.utils.properties import get_first_property_value, property_as_bool, property_as_int
168169
from pyiceberg.utils.singleton import Singleton
169170
from pyiceberg.utils.truncate import truncate_upper_bound_binary_string, truncate_upper_bound_text_string
@@ -400,7 +401,13 @@ def _initialize_fs(self, scheme: str, netloc: Optional[str] = None) -> FileSyste
400401
gcs_kwargs["credential_token_expiration"] = millis_to_datetime(int(expiration))
401402
if bucket_location := self.properties.get(GCS_DEFAULT_LOCATION):
402403
gcs_kwargs["default_bucket_location"] = bucket_location
403-
if endpoint := self.properties.get(GCS_ENDPOINT):
404+
if endpoint := get_first_property_value(self.properties, GCS_SERVICE_HOST, GCS_ENDPOINT):
405+
if self.properties.get(GCS_ENDPOINT):
406+
deprecation_message(
407+
deprecated_in="0.8.0",
408+
removed_in="0.9.0",
409+
help_message=f"The property {GCS_ENDPOINT} is deprecated, please use {GCS_SERVICE_HOST} instead",
410+
)
404411
url_parts = urlparse(endpoint)
405412
gcs_kwargs["scheme"] = url_parts.scheme
406413
gcs_kwargs["endpoint_override"] = url_parts.netloc

tests/conftest.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -53,8 +53,8 @@
5353
from pyiceberg.catalog.noop import NoopCatalog
5454
from pyiceberg.expressions import BoundReference
5555
from pyiceberg.io import (
56-
GCS_ENDPOINT,
5756
GCS_PROJECT_ID,
57+
GCS_SERVICE_HOST,
5858
GCS_TOKEN,
5959
GCS_TOKEN_EXPIRES_AT_MS,
6060
fsspec,
@@ -1873,7 +1873,7 @@ def fsspec_fileio(request: pytest.FixtureRequest) -> FsspecFileIO:
18731873
@pytest.fixture
18741874
def fsspec_fileio_gcs(request: pytest.FixtureRequest) -> FsspecFileIO:
18751875
properties = {
1876-
GCS_ENDPOINT: request.config.getoption("--gcs.endpoint"),
1876+
GCS_SERVICE_HOST: request.config.getoption("--gcs.endpoint"),
18771877
GCS_TOKEN: request.config.getoption("--gcs.oauth2.token"),
18781878
GCS_PROJECT_ID: request.config.getoption("--gcs.project-id"),
18791879
}
@@ -1885,7 +1885,7 @@ def pyarrow_fileio_gcs(request: pytest.FixtureRequest) -> "PyArrowFileIO":
18851885
from pyiceberg.io.pyarrow import PyArrowFileIO
18861886

18871887
properties = {
1888-
GCS_ENDPOINT: request.config.getoption("--gcs.endpoint"),
1888+
GCS_SERVICE_HOST: request.config.getoption("--gcs.endpoint"),
18891889
GCS_TOKEN: request.config.getoption("--gcs.oauth2.token"),
18901890
GCS_PROJECT_ID: request.config.getoption("--gcs.project-id"),
18911891
GCS_TOKEN_EXPIRES_AT_MS: datetime_to_millis(datetime.now()) + 60 * 1000,

0 commit comments

Comments
 (0)