Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add client credential oauth integration support + related databricks helpers to SDK #348

Merged
merged 22 commits into from
Dec 6, 2024
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
67c7af3
initial proposal for client credentials support in python sdk
zackverham Nov 27, 2024
67edfbb
a bit of refactoring + some more documentation
zackverham Nov 27, 2024
08796e2
test coverage, linting
zackverham Dec 2, 2024
e654b0b
Update src/posit/connect/external/databricks.py
zackverham Dec 3, 2024
dd02020
Update src/posit/connect/external/databricks.py
zackverham Dec 3, 2024
36634c7
responding to PR comments
zackverham Dec 3, 2024
d7ba5e6
fix comment typo
zackverham Dec 3, 2024
e146ddd
Wrap examples in markdown blocks
schloerke Dec 4, 2024
d58a743
Expose `external` on docs
schloerke Dec 4, 2024
b229b4b
updating docstrings in response to PR comments
zackverham Dec 4, 2024
3ec143d
Merge branch 'zack-client-creds' of github.com:posit-dev/posit-sdk-py…
zackverham Dec 4, 2024
89497bd
docstring polish - backticks
zackverham Dec 4, 2024
3f42531
fix linting issues
zackverham Dec 4, 2024
fb80f1d
Copy in existing example app and add title to each file
schloerke Dec 4, 2024
73569a3
add patched-in local strategy for client credential access tokens
zackverham Dec 5, 2024
91877ee
add implementing strategy that can use local credentials provider
zackverham Dec 5, 2024
c38fecd
missing params
zackverham Dec 5, 2024
4a1a512
commit to force develop dependency to update
zackverham Dec 5, 2024
54a47c9
add raise_for_status
zackverham Dec 5, 2024
144e895
adding docstrings, tests
zackverham Dec 5, 2024
2042938
sweep through docstrings in response to PR comments
zackverham Dec 6, 2024
59862c4
linting
zackverham Dec 6, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions docs/_quarto.yml
Original file line number Diff line number Diff line change
Expand Up @@ -107,3 +107,7 @@ quartodoc:
contents:
- connect.metrics
- connect.metrics.usage
- title: External Integrations
contents:
- connect.external.databricks
- connect.external.snowflake
4 changes: 2 additions & 2 deletions src/posit/connect/content.py
Original file line number Diff line number Diff line change
Expand Up @@ -755,8 +755,8 @@ def find_by(
-------
Optional[ContentItem]

Example
-------
Examples
--------
>>> find_by(name="example-content-name")
"""
attr_items = attrs.items()
Expand Down
222 changes: 200 additions & 22 deletions src/posit/connect/external/databricks.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,22 +2,27 @@
from typing import Callable, Dict, Optional

from ..client import Client
from ..oauth import Credentials
from .external import is_local

"""
NOTE: These APIs are provided as a convenience and are subject to breaking changes:
https://github.com/databricks/databricks-sdk-py#interface-stability
"""

POSIT_OAUTH_INTEGRATION_AUTH_TYPE = "posit-oauth-integration"

# The Databricks SDK CredentialsProvider == Databricks SQL HeaderFactory
CredentialsProvider = Callable[[], Dict[str, str]]


class CredentialsStrategy(abc.ABC):
"""Maintain compatibility with the Databricks SQL/SDK client libraries.

https://github.com/databricks/databricks-sql-python/blob/v3.3.0/src/databricks/sql/auth/authenticators.py#L19-L33
https://github.com/databricks/databricks-sdk-py/blob/v0.29.0/databricks/sdk/credentials_provider.py#L44-L54
See Also
--------
* https://github.com/databricks/databricks-sql-python/blob/v3.3.0/src/databricks/sql/auth/authenticators.py#L19-L33
* https://github.com/databricks/databricks-sdk-py/blob/v0.29.0/databricks/sdk/credentials_provider.py#L44-L54
"""

@abc.abstractmethod
Expand All @@ -29,29 +34,122 @@ def __call__(self, *args, **kwargs) -> CredentialsProvider:
raise NotImplementedError


def _new_bearer_authorization_header(credentials: Credentials) -> Dict[str, str]:
"""Helper to transform an Credentials object into the Bearer auth header consumed by databricks.

Raises
------
ValueError: If provided Credentials object does not contain an access token

Returns
-------
Dict[str, str]
"""
access_token = credentials.get("access_token")
if access_token is None:
raise ValueError("Missing value for field 'access_token' in credentials.")
return {"Authorization": f"Bearer {access_token}"}


def _get_auth_type(local_auth_type: str) -> str:
"""Returns the auth type currently in use.

The databricks-sdk client uses the configured auth_type to create
a user-agent string which is used for attribution. We should only
overwrite the auth_type if we are using the PositCredentialsStrategy (non-local),
otherwise, we should return the auth_type of the configured local_strategy instead
to avoid breaking someone elses attribution.

NOTE: The databricks-sql client does not use auth_type to set the user-agent.
https://github.com/databricks/databricks-sql-python/blob/v3.3.0/src/databricks/sql/client.py#L214-L219

See Also
--------
* https://github.com/databricks/databricks-sdk-py/blob/v0.29.0/databricks/sdk/config.py#L261-L269

Returns
-------
str
"""
if is_local():
return local_auth_type

return POSIT_OAUTH_INTEGRATION_AUTH_TYPE


class PositContentCredentialsProvider:
"""CredentialsProvider implementation which initiates a credential exchange using a content-session-token."""

def __init__(self, client: Client):
self._client = client

def __call__(self) -> Dict[str, str]:
credentials = self._client.oauth.get_content_credentials()
return _new_bearer_authorization_header(credentials)


class PositCredentialsProvider:
"""CredentialsProvider implementation which initiates a credential exchange using a user-session-token."""

def __init__(self, client: Client, user_session_token: str):
self._client = client
self._user_session_token = user_session_token

def __call__(self) -> Dict[str, str]:
credentials = self._client.oauth.get_credentials(self._user_session_token)
access_token = credentials.get("access_token")
if access_token is None:
raise ValueError("Missing value for field 'access_token' in credentials.")
return {"Authorization": f"Bearer {access_token}"}
return _new_bearer_authorization_header(credentials)


class PositCredentialsStrategy(CredentialsStrategy):
class PositContentCredentialsStrategy(CredentialsStrategy):
"""`CredentialsStrategy` implementation which supports interacting with Service Account OAuth integrations on Connect.

This strategy callable class returns a `PositContentCredentialsProvider` when hosted on Connect, and
its `local_strategy` strategy otherwise.

Examples
--------
```python
from posit.connect.external.databricks import PositContentCredentialsStrategy

import pandas as pd
import requests

from databricks import sql
from databricks.sdk.core import ApiClient, Config, databricks_cli
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@zackverham did you ever figure out a workaround for getting local service accounts tokens via the cli fallback?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have not. Databricks labeled my ticket in the CLI repo as a bug, but that's where I'm stuck currently:

databricks/cli#1939

I'm not totally sure what we want to do with that. Options seem to be:

  1. remove the local option from our implementation and re-introduce it when databricks supports it
  2. leave the option to provide a local strategy + code comments explaining the databricks cli doesn't support it. A particularly motivated user could probably fork the databricks cli and implement a token command that follows the pattern established in other cli operations to get it to work. This isn't super likely and obviously pretty rough, but the local strategy here doesn't explicitly state it has to be provided by an official vendor.

from databricks.sdk.service.iam import CurrentUserAPI

# env vars
DATABRICKS_HOST = "<REDACTED>"
DATABRICKS_HOST_URL = f"https://{DATABRICKS_HOST}"
SQL_HTTP_PATH = "<REDACTED>"

posit_strategy = PositContentCredentialsStrategy(local_strategy=databricks_cli)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The other credentialsStrategy docstring uses a sample py-shiny app so that the user can (hopefully) see that the location that the initialization happens is really important. That context is lost in this example. I think it would be worthwhile to show both and call out explicitly why the initialization is different for service accounts than for viewer auth

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah, I guess the question ultimately is how much of this documentation should exist in the docstrings vs in our docs pages (both the cookbook examples themselves, and prose discussions explaining things).

I think this might be a question for @schloerke - I don't know exactly what expectations would be for SDK consumers.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think docs strings should show usage examples.

I think the current Connect recipes are turning into small usage examples and should be moved to the doc strings. (And the recipes should be larger/longer goal-oriented stories combining many small usage examples.)


tl/dr; More usage examples in doc strings, please!

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This example needs to be updated to use the PositLocalContentCredentialsStrategy


cfg = Config(host=DATABRICKS_HOST_URL, credentials_strategy=posit_strategy)

databricks_user_info = CurrentUserAPI(ApiClient(cfg)).me()
print(f"Hello, {databricks_user_info.display_name}!")

query = "SELECT * FROM samples.nyctaxi.trips LIMIT 10;"
with sql.connect(
server_hostname=DATABRICKS_HOST,
http_path=SQL_HTTP_PATH,
credentials_provider=posit_strategy.sql_credentials_provider(cfg),
) as connection:
with connection.cursor() as cursor:
cursor.execute(query)
rows = cursor.fetchall()
print(pd.DataFrame([row.asDict() for row in rows]))
```
"""

def __init__(
self,
local_strategy: CredentialsStrategy,
client: Optional[Client] = None,
user_session_token: Optional[str] = None,
):
self._local_strategy = local_strategy
self._client = client
self._user_session_token = user_session_token

def sql_credentials_provider(self, *args, **kwargs):
"""The sql connector attempts to call the credentials provider w/o any args.
Expand All @@ -66,23 +164,103 @@ def sql_credentials_provider(self, *args, **kwargs):
return lambda: self.__call__(*args, **kwargs)

def auth_type(self) -> str:
"""Returns the auth type currently in use.
return _get_auth_type(self._local_strategy.auth_type())

def __call__(self, *args, **kwargs) -> CredentialsProvider:
# If the content is not running on Connect then fall back to local_strategy
if is_local():
return self._local_strategy(*args, **kwargs)

The databricks-sdk client uses the configurated auth_type to create
a user-agent string which is used for attribution. We should only
overwrite the auth_type if we are using the PositCredentialsStrategy (non-local),
otherwise, we should return the auth_type of the configured local_strategy instead
to avoid breaking someone elses attribution.
if self._client is None:
self._client = Client()

return PositContentCredentialsProvider(self._client)


class PositCredentialsStrategy(CredentialsStrategy):
"""`CredentialsStrategy` implementation which supports interacting with Viewer OAuth integrations on Connect.

This strategy callable class returns a `PositCredentialsProvider` when hosted on Connect, and
its `local_strategy` strategy otherwise.

Examples
--------
```python
import os

import pandas as pd
from databricks import sql
from databricks.sdk.core import ApiClient, Config, databricks_cli
from databricks.sdk.service.iam import CurrentUserAPI
from posit.connect.external.databricks import PositCredentialsStrategy
from shiny import App, Inputs, Outputs, Session, render, ui

# env vars
DATABRICKS_HOST = "<REDACTED>"
DATABRICKS_HOST_URL = f"https://{DATABRICKS_HOST}"
SQL_HTTP_PATH = "<REDACTED>"

app_ui = ui.page_fluid(ui.output_text("text"), ui.output_data_frame("result"))


def server(i: Inputs, o: Outputs, session: Session):
session_token = session.http_conn.headers.get("Posit-Connect-User-Session-Token")
posit_strategy = PositCredentialsStrategy(
local_strategy=databricks_cli, user_session_token=session_token
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should this now be the patched local strategy?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

its sort of ambiguous imo. the user could still use the databricks_cli as the local strategy, if say for example their databricks admin doesn't want to hand out service principal credentials. They can still use their own identity for local development with the databricks cli - that was my thinking for keeping this as-is.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added some comments to the docstring to clarify the various use cases.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ah good point!

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ah, I myself got confused - to clarify, this strategy is supporting viewer auth, which has no issues. The local strategy here works fine. Its just the content strategies that need the patch. (I still added docstring updates in the right places)

)
cfg = Config(host=DATABRICKS_HOST_URL, credentials_strategy=posit_strategy)

@render.data_frame
def result():
query = "SELECT * FROM samples.nyctaxi.trips LIMIT 10;"

with sql.connect(
server_hostname=DATABRICKS_HOST,
http_path=SQL_HTTP_PATH,
credentials_provider=posit_strategy.sql_credentials_provider(cfg),
) as connection:
with connection.cursor() as cursor:
cursor.execute(query)
rows = cursor.fetchall()
df = pd.DataFrame(rows, columns=[col[0] for col in cursor.description])
return df

@render.text
def text():
databricks_user_info = CurrentUserAPI(ApiClient(cfg)).me()
return f"Hello, {databricks_user_info.display_name}!"


app = App(app_ui, server)
```
"""

def __init__(
self,
local_strategy: CredentialsStrategy,
client: Optional[Client] = None,
user_session_token: Optional[str] = None,
):
self._local_strategy = local_strategy
self._client = client
self._user_session_token = user_session_token

def sql_credentials_provider(self, *args, **kwargs):
"""The sql connector attempts to call the credentials provider w/o any args.

https://github.com/databricks/databricks-sdk-py/blob/v0.29.0/databricks/sdk/config.py#L261-L269
The SQL client's `ExternalAuthProvider` is not compatible w/ the SDK's implementation of
`CredentialsProvider`, so create a no-arg lambda that wraps the args defined by the real caller.
This way we can pass in a databricks `Config` object required by most of the SDK's `CredentialsProvider`
implementations from where `sql.connect` is called.

NOTE: The databricks-sql client does not use auth_type to set the user-agent.
https://github.com/databricks/databricks-sql-python/blob/v3.3.0/src/databricks/sql/client.py#L214-L219
See Also
--------
* https://github.com/databricks/databricks-sql-python/issues/148#issuecomment-2271561365
"""
if is_local():
return self._local_strategy.auth_type()
else:
return "posit-oauth-integration"
return lambda: self.__call__(*args, **kwargs)

def auth_type(self) -> str:
return _get_auth_type(self._local_strategy.auth_type())

def __call__(self, *args, **kwargs) -> CredentialsProvider:
# If the content is not running on Connect then fall back to local_strategy
Expand Down
1 change: 1 addition & 0 deletions src/posit/connect/oauth/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
from .oauth import Credentials as Credentials
from .oauth import OAuth as OAuth
44 changes: 39 additions & 5 deletions src/posit/connect/oauth/oauth.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import os
from typing import Optional

from typing_extensions import TypedDict
Expand All @@ -8,12 +9,36 @@
from .integrations import Integrations
from .sessions import Sessions

GRANT_TYPE = "urn:ietf:params:oauth:grant-type:token-exchange"
USER_SESSION_TOKEN_TYPE = "urn:posit:connect:user-session-token"
CONTENT_SESSION_TOKEN_TYPE = "urn:posit:connect:content-session-token"

def _get_content_session_token() -> str:
"""Return the content session token.

Reads the environment variable 'CONNECT_CONTENT_SESSION_TOKEN'.

Raises
------
ValueError: If CONNECT_CONTENT_SESSION_TOKEN is not set or invalid

Returns
-------
str
"""
value = os.environ.get("CONNECT_CONTENT_SESSION_TOKEN")
if not value:
raise ValueError("Invalid value for 'CONNECT_CONTENT_SESSION_TOKEN': Must be a non-empty string.")
return value

class OAuth(Resources):
def __init__(self, params: ResourceParameters, api_key: str) -> None:
super().__init__(params)
self.api_key = api_key

def _get_credentials_url(self) -> str:
return self.params.url + "v1/oauth/integrations/credentials"

@property
def integrations(self):
return Integrations(self.params)
Expand All @@ -23,18 +48,27 @@ def sessions(self):
return Sessions(self.params)

def get_credentials(self, user_session_token: Optional[str] = None) -> Credentials:
url = self.params.url + "v1/oauth/integrations/credentials"

"""Perform an oauth credential exchange with a user-session-token."""
# craft a credential exchange request
data = {}
data["grant_type"] = "urn:ietf:params:oauth:grant-type:token-exchange"
data["subject_token_type"] = "urn:posit:connect:user-session-token"
data["grant_type"] = GRANT_TYPE
data["subject_token_type"] = USER_SESSION_TOKEN_TYPE
if user_session_token:
data["subject_token"] = user_session_token

response = self.params.session.post(url, data=data)
response = self.params.session.post(self._get_credentials_url(), data=data)
return Credentials(**response.json())

def get_content_credentials(self, content_session_token: Optional[str] = None) -> Credentials:
"""Perform an oauth credential exchange with a content-session-token."""
# craft a credential exchange request
data = {}
data["grant_type"] = GRANT_TYPE
data["subject_token_type"] = CONTENT_SESSION_TOKEN_TYPE
data["subject_token"] = content_session_token or _get_content_session_token()

response = self.params.session.post(self._get_credentials_url(), data=data)
return Credentials(**response.json())

class Credentials(TypedDict, total=False):
access_token: str
Expand Down
Loading
Loading