Skip to content

Commit 8dcdb4b

Browse files
committed
feat: allow passing http client options
1 parent b71b7f0 commit 8dcdb4b

File tree

6 files changed

+408
-11
lines changed

6 files changed

+408
-11
lines changed

examples/object_store.ipynb

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,39 @@
139139
"dataset_part = ds.dataset(\"/partitioned\", format=\"parquet\", filesystem=store, partitioning=partitioning)\n",
140140
"dataset_part.schema\n"
141141
]
142+
},
143+
{
144+
"cell_type": "code",
145+
"execution_count": null,
146+
"metadata": {},
147+
"outputs": [],
148+
"source": [
149+
"from object_store import ObjectStore\n",
150+
"\n",
151+
"store = ObjectStore(\"az://delta-rs\", options={\"account_name\": \"mlfusiondev\", \"use_azure_cli\": \"true\"})\n",
152+
"\n",
153+
"store.list()"
154+
]
155+
},
156+
{
157+
"cell_type": "code",
158+
"execution_count": null,
159+
"metadata": {},
160+
"outputs": [],
161+
"source": [
162+
"import pyarrow.fs as pa_fs\n",
163+
"from object_store.arrow import ArrowFileSystemHandler\n",
164+
"from object_store import ClientOptions\n",
165+
"import os\n",
166+
"\n",
167+
"storage_options = {\n",
168+
" \"account_name\": os.environ[\"AZURE_STORAGE_ACCOUNT_NAME\"],\n",
169+
" \"account_key\": os.environ[\"AZURE_STORAGE_ACCOUNT_KEY\"],\n",
170+
"}\n",
171+
"\n",
172+
"filesystem = pa_fs.PyFileSystem(ArrowFileSystemHandler(\"adl://simple\", storage_options, ClientOptions()))\n",
173+
"filesystem.get_file_info([\"part-00000-a72b1fb3-f2df-41fe-a8f0-e65b746382dd-c000.snappy.parquet\"])"
174+
]
142175
}
143176
],
144177
"metadata": {

object-store/object_store/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
# NOTE aliasing the imports with 'as' makes them public in the eyes
55
# of static code checkers. Thus we avoid listing them with __all__ = ...
6+
from ._internal import ClientOptions as ClientOptions
67
from ._internal import ListResult as ListResult
78
from ._internal import ObjectMeta as ObjectMeta
89
from ._internal import ObjectStore as _ObjectStore

object-store/object_store/_internal.pyi

Lines changed: 97 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,103 @@ class ListResult:
3232
def objects(self) -> list[ObjectMeta]:
3333
"""Object metadata for the listing"""
3434

35+
class ClientOptions:
36+
"""HTTP client configuration for remote object stores"""
37+
38+
@property
39+
def user_agent(self) -> str | None:
40+
"""Sets the User-Agent header to be used by this client
41+
42+
Default is based on the version of this crate
43+
"""
44+
@property
45+
def default_content_type(self) -> str | None:
46+
"""Set the default CONTENT_TYPE for uploads"""
47+
@property
48+
def proxy_url(self) -> str | None:
49+
"""Set an HTTP proxy to use for requests"""
50+
@property
51+
def allow_http(self) -> bool:
52+
"""Sets what protocol is allowed.
53+
54+
If `allow_http` is :
55+
* false (default): Only HTTPS ise allowed
56+
* true: HTTP and HTTPS are allowed
57+
"""
58+
@property
59+
def allow_insecure(self) -> bool:
60+
"""Allows connections to invalid SSL certificates
61+
* false (default): Only valid HTTPS certificates are allowed
62+
* true: All HTTPS certificates are allowed
63+
64+
# Warning
65+
66+
You should think very carefully before using this method. If
67+
invalid certificates are trusted, *any* certificate for *any* site
68+
will be trusted for use. This includes expired certificates. This
69+
introduces significant vulnerabilities, and should only be used
70+
as a last resort or for testing.
71+
"""
72+
@property
73+
def timeout(self) -> int:
74+
"""Set a request timeout (seconds)
75+
76+
The timeout is applied from when the request starts connecting until the
77+
response body has finished
78+
"""
79+
@property
80+
def connect_timeout(self) -> int:
81+
"""Set a timeout (seconds) for only the connect phase of a Client"""
82+
@property
83+
def pool_idle_timeout(self) -> int:
84+
"""Set the pool max idle timeout (seconds)
85+
86+
This is the length of time an idle connection will be kept alive
87+
88+
Default is 90 seconds
89+
"""
90+
@property
91+
def pool_max_idle_per_host(self) -> int:
92+
"""Set the maximum number of idle connections per host
93+
94+
Default is no limit"""
95+
@property
96+
def http2_keep_alive_interval(self) -> int:
97+
"""Sets an interval for HTTP2 Ping frames should be sent to keep a connection alive.
98+
99+
Default is disabled
100+
"""
101+
@property
102+
def http2_keep_alive_timeout(self) -> int:
103+
"""Sets a timeout for receiving an acknowledgement of the keep-alive ping.
104+
105+
If the ping is not acknowledged within the timeout, the connection will be closed.
106+
Does nothing if http2_keep_alive_interval is disabled.
107+
108+
Default is disabled
109+
"""
110+
@property
111+
def http2_keep_alive_while_idle(self) -> bool:
112+
"""Enable HTTP2 keep alive pings for idle connections
113+
114+
If disabled, keep-alive pings are only sent while there are open request/response
115+
streams. If enabled, pings are also sent when no streams are active
116+
117+
Default is disabled
118+
"""
119+
@property
120+
def http1_only(self) -> bool:
121+
"""Only use http1 connections"""
122+
@property
123+
def http2_only(self) -> bool:
124+
"""Only use http2 connections"""
125+
35126
class ObjectStore:
36127
"""A uniform API for interacting with object storage services and local files."""
37128

38-
def __init__(self, root: str, options: dict[str, str] | None = None) -> None: ...
129+
def __init__(
130+
self, root: str, options: dict[str, str] | None = None, client_options: ClientOptions | None = None
131+
) -> None: ...
39132
def get(self, location: Path) -> bytes:
40133
"""Return the bytes that are stored at the specified location."""
41134
def get_range(self, location: Path, start: int, length: int) -> bytes:
@@ -115,7 +208,9 @@ class ObjectOutputStream:
115208
class ArrowFileSystemHandler:
116209
"""Implementation of pyarrow.fs.FileSystemHandler for use with pyarrow.fs.PyFileSystem"""
117210

118-
def __init__(self, root: str, options: dict[str, str] | None = None) -> None: ...
211+
def __init__(
212+
self, root: str, options: dict[str, str] | None = None, client_options: ClientOptions | None = None
213+
) -> None: ...
119214
def copy_file(self, src: str, dst: str) -> None:
120215
"""Copy a file.
121216

object-store/src/file.rs

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ use std::sync::Arc;
33

44
use crate::builder::ObjectStoreBuilder;
55
use crate::utils::{delete_dir, walk_tree};
6-
use crate::ObjectStoreError;
6+
use crate::{ObjectStoreError, PyClientOptions};
77

88
use object_store::path::Path;
99
use object_store::{DynObjectStore, Error as InnerObjectStoreError, ListResult, MultipartId};
@@ -25,11 +25,18 @@ pub struct ArrowFileSystemHandler {
2525
#[pymethods]
2626
impl ArrowFileSystemHandler {
2727
#[new]
28-
#[pyo3(signature = (root, options = None))]
29-
fn new(root: String, options: Option<HashMap<String, String>>) -> PyResult<Self> {
28+
#[pyo3(signature = (root, options = None, client_options = None))]
29+
fn new(
30+
root: String,
31+
options: Option<HashMap<String, String>>,
32+
client_options: Option<PyClientOptions>,
33+
) -> PyResult<Self> {
34+
let client_options = client_options.unwrap_or_default();
3035
let inner = ObjectStoreBuilder::new(root.clone())
3136
.with_path_as_prefix(true)
3237
.with_options(options.clone().unwrap_or_default())
38+
.with_client_options(client_options.client_options()?)
39+
.with_retry_config(client_options.retry_config()?)
3340
.build()
3441
.map_err(ObjectStoreError::from)?;
3542
Ok(Self {

0 commit comments

Comments
 (0)