Build databricks_account
and databricks_workspace
providers for stackql
using the databricks web documentation:
The program requires selenium
and the chromedriver
for windows, use PowerShell to run the following code to extract web doc data into machine readable staging documents, the staging documents are then converted into tagged OpenAPI specification documents organized by service:
python -m venv venv
.\venv\Scripts\Activate
pip install -r requirements.txt
pip freeze
# scrape web docs
python .\process_web_docs.py account --clean --debug
python .\process_web_docs.py workspace --clean --debug
# generate openapi specs
python .\generate_openapi_specs.py account --clean --debug
python .\generate_openapi_specs.py workspace --clean --debug
deactivate
Remove-Item -Recurse -Force ./venv
To run tests locally, clone stackql-provider-tests, and run locally:
# run from the directory you cloned into
cd /mnt/c/LocalGitRepos/stackql/core/stackql-provider-tests
# test account
bash test-provider.sh \
databricks_account \
false \
/mnt/c/LocalGitRepos/stackql/openapi-conversion/stackql-databricks-openapi/openapi_providers \
true
# test workspace
bash test-provider.sh \
databricks_workspace \
false \
/mnt/c/LocalGitRepos/stackql/openapi-conversion/stackql-databricks-openapi/openapi_providers \
true
# back to starting dir
cd /mnt/c/LocalGitRepos/stackql/openapi-conversion/stackql-databricks-openapi
curl -L https://bit.ly/stackql-zip -O && unzip stackql-zip
PROVIDER_REGISTRY_ROOT_DIR="$(pwd)/openapi_providers"
REG_STR='{"url": "file://'${PROVIDER_REGISTRY_ROOT_DIR}'", "localDocRoot": "'${PROVIDER_REGISTRY_ROOT_DIR}'", "verifyConfig": {"nopVerify": true}}'
./stackql shell --registry="${REG_STR}"
some test queries...
SELECT
u.id,
displayName,
userName,
active
FROM databricks_account.iam.users u, JSON_EACH(roles)
WHERE account_id = 'ebfcc5a9-9d49-4c93-b651-b3ee6cf1c9ce'
AND JSON_EXTRACT(json_each.value, '$.value') = 'account_admin';
select
gr.id,
displayName,
json_extract(json_each.value, '$.value') as entitlement
from databricks_workspace.iam.groups gr, JSON_EACH(entitlements)
where deployment_name = 'dbc-ddbc0f51-c9cf';
SELECT
sp.id,
active,
applicationId,
displayName,
externalId
FROM databricks_account.iam.service_principals sp, JSON_EACH(roles)
WHERE account_id = 'ebfcc5a9-9d49-4c93-b651-b3ee6cf1c9ce'
AND JSON_EXTRACT(json_each.value, '$.value') = 'account_admin';
select
workspace_id,
workspace_name,
deployment_name,
workspace_status,
pricing_tier,
aws_region,
credentials_id,
storage_configuration_id
from
databricks_account.provisioning.workspaces where account_id = 'ebfcc5a9-9d49-4c93-b651-b3ee6cf1c9ce';
select
cluster_id,
aws_attributes,
node_type_id,
state
from
databricks_workspace.compute.clusters
where deployment_name = 'dbc-ddbc0f51-c9cf';
select
*
from databricks_account.provisioning.vw_workspaces
where account_id = 'ebfcc5a9-9d49-4c93-b651-b3ee6cf1c9ce'
python .\find_new_routes.py workspace
# or
python3 .\find_new_routes.py account
DATABRICKS_TOKEN=$(curl --request POST "https://accounts.cloud.databricks.com/oidc/accounts/${DATABRICKS_ACCOUNT_ID}/v1/token" \
--header "Content-Type: application/x-www-form-urlencoded" \
--data-urlencode "grant_type=client_credentials" \
--data-urlencode "client_id=${DATABRICKS_CLIENT_ID}" \
--data-urlencode "client_secret=${DATABRICKS_CLIENT_SECRET}" \
--data-urlencode "scope=all-apis" | jq -r .access_token)
curl --request GET "https://accounts.cloud.databricks.com/api/2.0/accounts/${DATABRICKS_ACCOUNT_ID}/workspaces" \
--header "Authorization: Bearer ${DATABRICKS_TOKEN}" \
-vvv \
--header "Accept: application/json"
bash generate_user_docs.sh account
bash generate_user_docs.sh workspace