-
Notifications
You must be signed in to change notification settings - Fork 5
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: Add client credential oauth integration support + related databricks helpers to SDK #348
Changes from 9 commits
67c7af3
67edfbb
08796e2
e654b0b
dd02020
36634c7
d7ba5e6
e146ddd
d58a743
b229b4b
3ec143d
89497bd
3f42531
fb80f1d
73569a3
91877ee
c38fecd
4a1a512
54a47c9
144e895
2042938
59862c4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,22 +2,27 @@ | |
from typing import Callable, Dict, Optional | ||
|
||
from ..client import Client | ||
from ..oauth import Credentials | ||
from .external import is_local | ||
|
||
""" | ||
NOTE: These APIs are provided as a convenience and are subject to breaking changes: | ||
https://github.com/databricks/databricks-sdk-py#interface-stability | ||
""" | ||
|
||
POSIT_OAUTH_INTEGRATION_AUTH_TYPE = "posit-oauth-integration" | ||
|
||
# The Databricks SDK CredentialsProvider == Databricks SQL HeaderFactory | ||
CredentialsProvider = Callable[[], Dict[str, str]] | ||
|
||
|
||
class CredentialsStrategy(abc.ABC): | ||
"""Maintain compatibility with the Databricks SQL/SDK client libraries. | ||
|
||
https://github.com/databricks/databricks-sql-python/blob/v3.3.0/src/databricks/sql/auth/authenticators.py#L19-L33 | ||
https://github.com/databricks/databricks-sdk-py/blob/v0.29.0/databricks/sdk/credentials_provider.py#L44-L54 | ||
See Also | ||
-------- | ||
* https://github.com/databricks/databricks-sql-python/blob/v3.3.0/src/databricks/sql/auth/authenticators.py#L19-L33 | ||
* https://github.com/databricks/databricks-sdk-py/blob/v0.29.0/databricks/sdk/credentials_provider.py#L44-L54 | ||
""" | ||
|
||
@abc.abstractmethod | ||
|
@@ -29,29 +34,122 @@ def __call__(self, *args, **kwargs) -> CredentialsProvider: | |
raise NotImplementedError | ||
|
||
|
||
def _new_bearer_authorization_header(credentials: Credentials) -> Dict[str, str]: | ||
"""Helper to transform an Credentials object into the Bearer auth header consumed by databricks. | ||
|
||
Raises | ||
------ | ||
ValueError: If provided Credentials object does not contain an access token | ||
|
||
Returns | ||
------- | ||
Dict[str, str] | ||
""" | ||
access_token = credentials.get("access_token") | ||
if access_token is None: | ||
raise ValueError("Missing value for field 'access_token' in credentials.") | ||
return {"Authorization": f"Bearer {access_token}"} | ||
|
||
|
||
def _get_auth_type(local_auth_type: str) -> str: | ||
"""Returns the auth type currently in use. | ||
|
||
The databricks-sdk client uses the configured auth_type to create | ||
a user-agent string which is used for attribution. We should only | ||
overwrite the auth_type if we are using the PositCredentialsStrategy (non-local), | ||
otherwise, we should return the auth_type of the configured local_strategy instead | ||
to avoid breaking someone elses attribution. | ||
|
||
NOTE: The databricks-sql client does not use auth_type to set the user-agent. | ||
https://github.com/databricks/databricks-sql-python/blob/v3.3.0/src/databricks/sql/client.py#L214-L219 | ||
|
||
See Also | ||
-------- | ||
* https://github.com/databricks/databricks-sdk-py/blob/v0.29.0/databricks/sdk/config.py#L261-L269 | ||
|
||
Returns | ||
------- | ||
str | ||
""" | ||
if is_local(): | ||
return local_auth_type | ||
|
||
return POSIT_OAUTH_INTEGRATION_AUTH_TYPE | ||
|
||
|
||
class PositContentCredentialsProvider: | ||
"""CredentialsProvider implementation which initiates a credential exchange using a content-session-token.""" | ||
|
||
def __init__(self, client: Client): | ||
self._client = client | ||
|
||
def __call__(self) -> Dict[str, str]: | ||
credentials = self._client.oauth.get_content_credentials() | ||
return _new_bearer_authorization_header(credentials) | ||
|
||
|
||
class PositCredentialsProvider: | ||
"""CredentialsProvider implementation which initiates a credential exchange using a user-session-token.""" | ||
|
||
def __init__(self, client: Client, user_session_token: str): | ||
self._client = client | ||
self._user_session_token = user_session_token | ||
|
||
def __call__(self) -> Dict[str, str]: | ||
credentials = self._client.oauth.get_credentials(self._user_session_token) | ||
access_token = credentials.get("access_token") | ||
if access_token is None: | ||
raise ValueError("Missing value for field 'access_token' in credentials.") | ||
return {"Authorization": f"Bearer {access_token}"} | ||
return _new_bearer_authorization_header(credentials) | ||
|
||
|
||
class PositCredentialsStrategy(CredentialsStrategy): | ||
class PositContentCredentialsStrategy(CredentialsStrategy): | ||
"""`CredentialsStrategy` implementation which supports interacting with Service Account OAuth integrations on Connect. | ||
|
||
This strategy callable class returns a `PositContentCredentialsProvider` when hosted on Connect, and | ||
its `local_strategy` strategy otherwise. | ||
|
||
Examples | ||
-------- | ||
```python | ||
from posit.connect.external.databricks import PositContentCredentialsStrategy | ||
|
||
import pandas as pd | ||
import requests | ||
|
||
from databricks import sql | ||
from databricks.sdk.core import ApiClient, Config, databricks_cli | ||
from databricks.sdk.service.iam import CurrentUserAPI | ||
|
||
# env vars | ||
DATABRICKS_HOST = "<REDACTED>" | ||
DATABRICKS_HOST_URL = f"https://{DATABRICKS_HOST}" | ||
SQL_HTTP_PATH = "<REDACTED>" | ||
|
||
posit_strategy = PositContentCredentialsStrategy(local_strategy=databricks_cli) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The other credentialsStrategy docstring uses a sample py-shiny app so that the user can (hopefully) see that the location that the initialization happens is really important. That context is lost in this example. I think it would be worthwhile to show both and call out explicitly why the initialization is different for service accounts than for viewer auth There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yeah, I guess the question ultimately is how much of this documentation should exist in the docstrings vs in our docs pages (both the cookbook examples themselves, and prose discussions explaining things). I think this might be a question for @schloerke - I don't know exactly what expectations would be for SDK consumers. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think docs strings should show usage examples. I think the current Connect recipes are turning into small usage examples and should be moved to the doc strings. (And the recipes should be larger/longer goal-oriented stories combining many small usage examples.) tl/dr; More usage examples in doc strings, please! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This example needs to be updated to use the |
||
|
||
cfg = Config(host=DATABRICKS_HOST_URL, credentials_strategy=posit_strategy) | ||
|
||
databricks_user_info = CurrentUserAPI(ApiClient(cfg)).me() | ||
print(f"Hello, {databricks_user_info.display_name}!") | ||
|
||
query = "SELECT * FROM samples.nyctaxi.trips LIMIT 10;" | ||
with sql.connect( | ||
server_hostname=DATABRICKS_HOST, | ||
http_path=SQL_HTTP_PATH, | ||
credentials_provider=posit_strategy.sql_credentials_provider(cfg), | ||
) as connection: | ||
with connection.cursor() as cursor: | ||
cursor.execute(query) | ||
rows = cursor.fetchall() | ||
print(pd.DataFrame([row.asDict() for row in rows])) | ||
``` | ||
""" | ||
|
||
def __init__( | ||
self, | ||
local_strategy: CredentialsStrategy, | ||
client: Optional[Client] = None, | ||
user_session_token: Optional[str] = None, | ||
): | ||
self._local_strategy = local_strategy | ||
self._client = client | ||
self._user_session_token = user_session_token | ||
|
||
def sql_credentials_provider(self, *args, **kwargs): | ||
"""The sql connector attempts to call the credentials provider w/o any args. | ||
|
@@ -66,23 +164,103 @@ def sql_credentials_provider(self, *args, **kwargs): | |
return lambda: self.__call__(*args, **kwargs) | ||
|
||
def auth_type(self) -> str: | ||
"""Returns the auth type currently in use. | ||
return _get_auth_type(self._local_strategy.auth_type()) | ||
|
||
def __call__(self, *args, **kwargs) -> CredentialsProvider: | ||
# If the content is not running on Connect then fall back to local_strategy | ||
if is_local(): | ||
return self._local_strategy(*args, **kwargs) | ||
|
||
The databricks-sdk client uses the configurated auth_type to create | ||
a user-agent string which is used for attribution. We should only | ||
overwrite the auth_type if we are using the PositCredentialsStrategy (non-local), | ||
otherwise, we should return the auth_type of the configured local_strategy instead | ||
to avoid breaking someone elses attribution. | ||
if self._client is None: | ||
self._client = Client() | ||
|
||
return PositContentCredentialsProvider(self._client) | ||
|
||
|
||
class PositCredentialsStrategy(CredentialsStrategy): | ||
"""`CredentialsStrategy` implementation which supports interacting with Viewer OAuth integrations on Connect. | ||
|
||
This strategy callable class returns a `PositCredentialsProvider` when hosted on Connect, and | ||
its `local_strategy` strategy otherwise. | ||
|
||
Examples | ||
-------- | ||
```python | ||
import os | ||
|
||
import pandas as pd | ||
from databricks import sql | ||
from databricks.sdk.core import ApiClient, Config, databricks_cli | ||
from databricks.sdk.service.iam import CurrentUserAPI | ||
from posit.connect.external.databricks import PositCredentialsStrategy | ||
from shiny import App, Inputs, Outputs, Session, render, ui | ||
|
||
# env vars | ||
DATABRICKS_HOST = "<REDACTED>" | ||
DATABRICKS_HOST_URL = f"https://{DATABRICKS_HOST}" | ||
SQL_HTTP_PATH = "<REDACTED>" | ||
|
||
app_ui = ui.page_fluid(ui.output_text("text"), ui.output_data_frame("result")) | ||
|
||
|
||
def server(i: Inputs, o: Outputs, session: Session): | ||
session_token = session.http_conn.headers.get("Posit-Connect-User-Session-Token") | ||
posit_strategy = PositCredentialsStrategy( | ||
local_strategy=databricks_cli, user_session_token=session_token | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should this now be the patched local strategy? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. its sort of ambiguous imo. the user could still use the databricks_cli as the local strategy, if say for example their databricks admin doesn't want to hand out service principal credentials. They can still use their own identity for local development with the databricks cli - that was my thinking for keeping this as-is. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added some comments to the docstring to clarify the various use cases. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ah good point! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ah, I myself got confused - to clarify, this strategy is supporting viewer auth, which has no issues. The local strategy here works fine. Its just the content strategies that need the patch. (I still added docstring updates in the right places) |
||
) | ||
cfg = Config(host=DATABRICKS_HOST_URL, credentials_strategy=posit_strategy) | ||
|
||
@render.data_frame | ||
def result(): | ||
query = "SELECT * FROM samples.nyctaxi.trips LIMIT 10;" | ||
|
||
with sql.connect( | ||
server_hostname=DATABRICKS_HOST, | ||
http_path=SQL_HTTP_PATH, | ||
credentials_provider=posit_strategy.sql_credentials_provider(cfg), | ||
) as connection: | ||
with connection.cursor() as cursor: | ||
cursor.execute(query) | ||
rows = cursor.fetchall() | ||
df = pd.DataFrame(rows, columns=[col[0] for col in cursor.description]) | ||
return df | ||
|
||
@render.text | ||
def text(): | ||
databricks_user_info = CurrentUserAPI(ApiClient(cfg)).me() | ||
return f"Hello, {databricks_user_info.display_name}!" | ||
|
||
|
||
app = App(app_ui, server) | ||
``` | ||
""" | ||
|
||
def __init__( | ||
self, | ||
local_strategy: CredentialsStrategy, | ||
client: Optional[Client] = None, | ||
user_session_token: Optional[str] = None, | ||
): | ||
self._local_strategy = local_strategy | ||
self._client = client | ||
self._user_session_token = user_session_token | ||
|
||
def sql_credentials_provider(self, *args, **kwargs): | ||
"""The sql connector attempts to call the credentials provider w/o any args. | ||
|
||
https://github.com/databricks/databricks-sdk-py/blob/v0.29.0/databricks/sdk/config.py#L261-L269 | ||
The SQL client's `ExternalAuthProvider` is not compatible w/ the SDK's implementation of | ||
`CredentialsProvider`, so create a no-arg lambda that wraps the args defined by the real caller. | ||
This way we can pass in a databricks `Config` object required by most of the SDK's `CredentialsProvider` | ||
implementations from where `sql.connect` is called. | ||
|
||
NOTE: The databricks-sql client does not use auth_type to set the user-agent. | ||
https://github.com/databricks/databricks-sql-python/blob/v3.3.0/src/databricks/sql/client.py#L214-L219 | ||
See Also | ||
-------- | ||
* https://github.com/databricks/databricks-sql-python/issues/148#issuecomment-2271561365 | ||
""" | ||
if is_local(): | ||
return self._local_strategy.auth_type() | ||
else: | ||
return "posit-oauth-integration" | ||
return lambda: self.__call__(*args, **kwargs) | ||
|
||
def auth_type(self) -> str: | ||
return _get_auth_type(self._local_strategy.auth_type()) | ||
|
||
def __call__(self, *args, **kwargs) -> CredentialsProvider: | ||
# If the content is not running on Connect then fall back to local_strategy | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,2 @@ | ||
from .oauth import Credentials as Credentials | ||
from .oauth import OAuth as OAuth |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@zackverham did you ever figure out a workaround for getting local service accounts tokens via the cli fallback?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I have not. Databricks labeled my ticket in the CLI repo as a bug, but that's where I'm stuck currently:
databricks/cli#1939
I'm not totally sure what we want to do with that. Options seem to be: