Skip to content

Commit 5d370b5

Browse files
committed
1 parent ba85dd1 commit 5d370b5

File tree

5 files changed

+25
-6
lines changed

5 files changed

+25
-6
lines changed

mkdocs/docs/configuration.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ For the FileIO there are several configuration options available:
131131
| gcs.cache-timeout | 60 | Configure the cache expiration time in seconds for object metadata cache |
132132
| gcs.requester-pays | False | Configure whether to use requester-pays requests |
133133
| gcs.session-kwargs | {} | Configure a dict of parameters to pass on to aiohttp.ClientSession; can contain, for example, proxy settings. |
134-
| gcs.endpoint | http://0.0.0.0:4443 | Configure an alternative endpoint for the GCS FileIO to access (format protocol://host:port) If not given, defaults to the value of environment variable "STORAGE_EMULATOR_HOST"; if that is not set either, will use the standard Google endpoint. |
134+
| gcs.service.host | http://0.0.0.0:4443 | Configure an alternative endpoint for the GCS FileIO to access (format protocol://host:port) If not given, defaults to the value of environment variable "STORAGE_EMULATOR_HOST"; if that is not set either, will use the standard Google endpoint. |
135135
| gcs.default-location | US | Configure the default location where buckets are created, like 'US' or 'EUROPE-WEST3'. |
136136
| gcs.version-aware | False | Configure whether to support object versioning on the GCS bucket. |
137137

pyiceberg/io/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@
7878
GCS_REQUESTER_PAYS = "gcs.requester-pays"
7979
GCS_SESSION_KWARGS = "gcs.session-kwargs"
8080
GCS_ENDPOINT = "gcs.endpoint"
81+
GCS_SERVICE_HOST = "gcs.service.host"
8182
GCS_DEFAULT_LOCATION = "gcs.default-bucket-location"
8283
GCS_VERSION_AWARE = "gcs.version-aware"
8384

pyiceberg/io/fsspec.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@
5757
GCS_ENDPOINT,
5858
GCS_PROJECT_ID,
5959
GCS_REQUESTER_PAYS,
60+
GCS_SERVICE_HOST,
6061
GCS_SESSION_KWARGS,
6162
GCS_TOKEN,
6263
GCS_VERSION_AWARE,
@@ -76,6 +77,7 @@
7677
OutputStream,
7778
)
7879
from pyiceberg.typedef import Properties
80+
from pyiceberg.utils.deprecated import deprecated
7981
from pyiceberg.utils.properties import get_first_property_value, property_as_bool
8082

8183
logger = logging.getLogger(__name__)
@@ -158,6 +160,14 @@ def _gs(properties: Properties) -> AbstractFileSystem:
158160
# https://gcsfs.readthedocs.io/en/latest/api.html#gcsfs.core.GCSFileSystem
159161
from gcsfs import GCSFileSystem
160162

163+
if (endpoint := properties.get(GCS_ENDPOINT)) and GCS_SERVICE_HOST not in properties:
164+
deprecated(
165+
deprecated_in="0.8.0",
166+
removed_in="0.9.0",
167+
help_message=f"The property {GCS_ENDPOINT} is deprecated, please use {GCS_SERVICE_HOST} instead",
168+
)(lambda: None)()
169+
properties[GCS_SERVICE_HOST] = endpoint
170+
161171
return GCSFileSystem(
162172
project=properties.get(GCS_PROJECT_ID),
163173
access=properties.get(GCS_ACCESS, "full_control"),
@@ -166,7 +176,7 @@ def _gs(properties: Properties) -> AbstractFileSystem:
166176
cache_timeout=properties.get(GCS_CACHE_TIMEOUT),
167177
requester_pays=property_as_bool(properties, GCS_REQUESTER_PAYS, False),
168178
session_kwargs=json.loads(properties.get(GCS_SESSION_KWARGS, "{}")),
169-
endpoint_url=properties.get(GCS_ENDPOINT),
179+
endpoint_url=properties.get(GCS_SERVICE_HOST),
170180
default_location=properties.get(GCS_DEFAULT_LOCATION),
171181
version_aware=property_as_bool(properties, GCS_VERSION_AWARE, False),
172182
)

pyiceberg/io/pyarrow.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@
8989
AWS_SESSION_TOKEN,
9090
GCS_DEFAULT_LOCATION,
9191
GCS_ENDPOINT,
92+
GCS_SERVICE_HOST,
9293
GCS_TOKEN,
9394
GCS_TOKEN_EXPIRES_AT_MS,
9495
HDFS_HOST,
@@ -388,7 +389,14 @@ def _initialize_fs(self, scheme: str, netloc: Optional[str] = None) -> FileSyste
388389
gcs_kwargs["credential_token_expiration"] = millis_to_datetime(int(expiration))
389390
if bucket_location := self.properties.get(GCS_DEFAULT_LOCATION):
390391
gcs_kwargs["default_bucket_location"] = bucket_location
391-
if endpoint := self.properties.get(GCS_ENDPOINT):
392+
if (endpoint := self.properties.get(GCS_ENDPOINT)) and GCS_SERVICE_HOST not in self.properties:
393+
deprecated(
394+
deprecated_in="0.8.0",
395+
removed_in="0.9.0",
396+
help_message=f"The property {GCS_ENDPOINT} is deprecated, please use {GCS_SERVICE_HOST} instead",
397+
)(lambda: None)()
398+
self.properties[GCS_SERVICE_HOST] = endpoint
399+
if endpoint := self.properties.get(GCS_SERVICE_HOST):
392400
url_parts = urlparse(endpoint)
393401
gcs_kwargs["scheme"] = url_parts.scheme
394402
gcs_kwargs["endpoint_override"] = url_parts.netloc

tests/conftest.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -53,8 +53,8 @@
5353
from pyiceberg.catalog.noop import NoopCatalog
5454
from pyiceberg.expressions import BoundReference
5555
from pyiceberg.io import (
56-
GCS_ENDPOINT,
5756
GCS_PROJECT_ID,
57+
GCS_SERVICE_HOST,
5858
GCS_TOKEN,
5959
GCS_TOKEN_EXPIRES_AT_MS,
6060
fsspec,
@@ -1873,7 +1873,7 @@ def fsspec_fileio(request: pytest.FixtureRequest) -> FsspecFileIO:
18731873
@pytest.fixture
18741874
def fsspec_fileio_gcs(request: pytest.FixtureRequest) -> FsspecFileIO:
18751875
properties = {
1876-
GCS_ENDPOINT: request.config.getoption("--gcs.endpoint"),
1876+
GCS_SERVICE_HOST: request.config.getoption("--gcs.endpoint"),
18771877
GCS_TOKEN: request.config.getoption("--gcs.oauth2.token"),
18781878
GCS_PROJECT_ID: request.config.getoption("--gcs.project-id"),
18791879
}
@@ -1885,7 +1885,7 @@ def pyarrow_fileio_gcs(request: pytest.FixtureRequest) -> "PyArrowFileIO":
18851885
from pyiceberg.io.pyarrow import PyArrowFileIO
18861886

18871887
properties = {
1888-
GCS_ENDPOINT: request.config.getoption("--gcs.endpoint"),
1888+
GCS_SERVICE_HOST: request.config.getoption("--gcs.endpoint"),
18891889
GCS_TOKEN: request.config.getoption("--gcs.oauth2.token"),
18901890
GCS_PROJECT_ID: request.config.getoption("--gcs.project-id"),
18911891
GCS_TOKEN_EXPIRES_AT_MS: datetime_to_millis(datetime.now()) + 60 * 1000,

0 commit comments

Comments
 (0)