diff --git a/docs.json b/docs.json
index ab53535a..ad9bd846 100644
--- a/docs.json
+++ b/docs.json
@@ -102,7 +102,13 @@
"v1.12.x/deployment/kubernetes/eks/airflow"
]
},
- "v1.12.x/deployment/kubernetes/gke",
+ {
+ "group": "GKE Deployment",
+ "pages": [
+ "v1.12.x/deployment/kubernetes/gke",
+ "v1.12.x/deployment/kubernetes/gke/airflow"
+ ]
+ },
"v1.12.x/deployment/kubernetes/aks",
{
"group": "On-Prem Deployment",
@@ -139,7 +145,13 @@
"v1.12.x/deployment/ingestion/external",
"v1.12.x/deployment/ingestion/external/airflow",
"v1.12.x/deployment/ingestion/external/airflow-docker-virtualenv",
- "v1.12.x/deployment/ingestion/external/mwaa",
+ {
+ "group": "AWS MWAA",
+ "pages": [
+ "v1.12.x/deployment/ingestion/external/mwaa",
+ "v1.12.x/deployment/ingestion/external/mwaa/virtualenv"
+ ]
+ },
"v1.12.x/deployment/ingestion/external/gcp-composer",
"v1.12.x/deployment/ingestion/external/github-actions",
"v1.12.x/deployment/ingestion/external/credentials",
@@ -1413,7 +1425,13 @@
"pages": [
"v1.12.x/how-to-guides/data-quality-observability/quality/data-quality-as-code",
"v1.12.x/how-to-guides/data-quality-observability/quality/data-quality-as-code/getting-started",
- "v1.12.x/how-to-guides/data-quality-observability/quality/data-quality-as-code/test-runner",
+ {
+ "group": "Test Runner",
+ "pages": [
+ "v1.12.x/how-to-guides/data-quality-observability/quality/data-quality-as-code/test-runner",
+ "v1.12.x/how-to-guides/data-quality-observability/quality/data-quality-as-code/test-runner/external-secrets"
+ ]
+ },
"v1.12.x/how-to-guides/data-quality-observability/quality/data-quality-as-code/dataframe-validation",
"v1.12.x/how-to-guides/data-quality-observability/quality/data-quality-as-code/dataframe-validation-chunking",
"v1.12.x/how-to-guides/data-quality-observability/quality/data-quality-as-code/test-definitions",
@@ -2325,7 +2343,13 @@
"v1.11.x/deployment/kubernetes/eks/airflow"
]
},
- "v1.11.x/deployment/kubernetes/gke",
+ {
+ "group": "GKE Deployment",
+ "pages": [
+ "v1.11.x/deployment/kubernetes/gke",
+ "v1.11.x/deployment/kubernetes/gke/airflow"
+ ]
+ },
"v1.11.x/deployment/kubernetes/aks",
{
"group": "On-Prem Deployment",
@@ -2356,7 +2380,13 @@
"v1.11.x/deployment/ingestion/external",
"v1.11.x/deployment/ingestion/external/airflow",
"v1.11.x/deployment/ingestion/external/airflow-docker-virtualenv",
- "v1.11.x/deployment/ingestion/external/mwaa",
+ {
+ "group": "AWS MWAA",
+ "pages": [
+ "v1.11.x/deployment/ingestion/external/mwaa",
+ "v1.11.x/deployment/ingestion/external/mwaa/virtualenv"
+ ]
+ },
"v1.11.x/deployment/ingestion/external/gcp-composer",
"v1.11.x/deployment/ingestion/external/github-actions",
"v1.11.x/deployment/ingestion/external/credentials",
@@ -2708,6 +2738,13 @@
"v1.11.x/connectors/database/dynamodb/troubleshooting"
]
},
+ {
+ "group": "Epic",
+ "pages": [
+ "v1.11.x/connectors/database/epic",
+ "v1.11.x/connectors/database/epic/troubleshooting"
+ ]
+ },
{
"group": "Exasol",
"pages": [
@@ -3581,7 +3618,13 @@
"pages": [
"v1.11.x/how-to-guides/data-quality-observability/quality/data-quality-as-code",
"v1.11.x/how-to-guides/data-quality-observability/quality/data-quality-as-code/getting-started",
- "v1.11.x/how-to-guides/data-quality-observability/quality/data-quality-as-code/test-runner",
+ {
+ "group": "Test Runner",
+ "pages": [
+ "v1.11.x/how-to-guides/data-quality-observability/quality/data-quality-as-code/test-runner",
+ "v1.11.x/how-to-guides/data-quality-observability/quality/data-quality-as-code/test-runner/external-secrets"
+ ]
+ },
"v1.11.x/how-to-guides/data-quality-observability/quality/data-quality-as-code/dataframe-validation",
"v1.11.x/how-to-guides/data-quality-observability/quality/data-quality-as-code/dataframe-validation-chunking",
"v1.11.x/how-to-guides/data-quality-observability/quality/data-quality-as-code/test-definitions",
@@ -4453,7 +4496,13 @@
"v1.13.x-SNAPSHOT/deployment/kubernetes/eks/airflow"
]
},
- "v1.13.x-SNAPSHOT/deployment/kubernetes/gke",
+ {
+ "group": "GKE Deployment",
+ "pages": [
+ "v1.13.x-SNAPSHOT/deployment/kubernetes/gke",
+ "v1.13.x-SNAPSHOT/deployment/kubernetes/gke/airflow"
+ ]
+ },
"v1.13.x-SNAPSHOT/deployment/kubernetes/aks",
{
"group": "On-Prem Deployment",
@@ -4490,7 +4539,13 @@
"v1.13.x-SNAPSHOT/deployment/ingestion/external",
"v1.13.x-SNAPSHOT/deployment/ingestion/external/airflow",
"v1.13.x-SNAPSHOT/deployment/ingestion/external/airflow-docker-virtualenv",
- "v1.13.x-SNAPSHOT/deployment/ingestion/external/mwaa",
+ {
+ "group": "AWS MWAA",
+ "pages": [
+ "v1.13.x-SNAPSHOT/deployment/ingestion/external/mwaa",
+ "v1.13.x-SNAPSHOT/deployment/ingestion/external/mwaa/virtualenv"
+ ]
+ },
"v1.13.x-SNAPSHOT/deployment/ingestion/external/gcp-composer",
"v1.13.x-SNAPSHOT/deployment/ingestion/external/github-actions",
"v1.13.x-SNAPSHOT/deployment/ingestion/external/credentials",
@@ -5772,7 +5827,13 @@
"pages": [
"v1.13.x-SNAPSHOT/how-to-guides/data-quality-observability/quality/data-quality-as-code",
"v1.13.x-SNAPSHOT/how-to-guides/data-quality-observability/quality/data-quality-as-code/getting-started",
- "v1.13.x-SNAPSHOT/how-to-guides/data-quality-observability/quality/data-quality-as-code/test-runner",
+ {
+ "group": "Test Runner",
+ "pages": [
+ "v1.13.x-SNAPSHOT/how-to-guides/data-quality-observability/quality/data-quality-as-code/test-runner",
+ "v1.13.x-SNAPSHOT/how-to-guides/data-quality-observability/quality/data-quality-as-code/test-runner/external-secrets"
+ ]
+ },
"v1.13.x-SNAPSHOT/how-to-guides/data-quality-observability/quality/data-quality-as-code/dataframe-validation",
"v1.13.x-SNAPSHOT/how-to-guides/data-quality-observability/quality/data-quality-as-code/dataframe-validation-chunking",
"v1.13.x-SNAPSHOT/how-to-guides/data-quality-observability/quality/data-quality-as-code/test-definitions",
diff --git a/v1.11.x/deployment/ingestion/external/mwaa.mdx b/v1.11.x/deployment/ingestion/external/mwaa.mdx
index be48c144..74d77779 100644
--- a/v1.11.x/deployment/ingestion/external/mwaa.mdx
+++ b/v1.11.x/deployment/ingestion/external/mwaa.mdx
@@ -1,7 +1,7 @@
---
title: Run the ingestion from AWS MWAA | Official Documentation
description: Integrate with MWAA to schedule and execute ingestion workflows using managed Airflow on AWS infrastructure.
-sidebarTitle: Mwaa
+sidebarTitle: Overview
collate: false
---
@@ -319,130 +319,6 @@ Moreover, one of the imports will depend on the MWAA Airflow version you are usi
Make sure to update the `ecs_operator_task` task call accordingly.
-## Ingestion Workflows as a Python Virtualenv Operator
-
-### PROs
-
-- Installation does not clash with existing libraries
-- Simpler than ECS
-
-### CONs
-
-- We need to install an additional plugin in MWAA
-- DAGs take longer to run due to needing to set up the virtualenv from scratch for each run.
-
-We need to update the `requirements.txt` file from the MWAA environment to add the following line:
-
-```
-virtualenv
-```
-
-Then, we need to set up a custom plugin in MWAA. Create a file named virtual_python_plugin.py. Note that you may need to update the python version (eg, python3.7 -> python3.10) depending on what your MWAA environment is running.
-```python
-"""
-Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy of
-this software and associated documentation files (the "Software"), to deal in
-the Software without restriction, including without limitation the rights to
-use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
-the Software, and to permit persons to whom the Software is furnished to do so.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
-FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
-COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
-IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-"""
-from airflow.plugins_manager import AirflowPlugin
-import airflow.utils.python_virtualenv
-from typing import List
-import os
-
-
-def _generate_virtualenv_cmd(tmp_dir: str, python_bin: str, system_site_packages: bool) -> List[str]:
- cmd = ['python3', '/usr/local/airflow/.local/lib/python3.7/site-packages/virtualenv', tmp_dir]
- if system_site_packages:
- cmd.append('--system-site-packages')
- if python_bin is not None:
- cmd.append(f'--python={python_bin}')
- return cmd
-
-
-airflow.utils.python_virtualenv._generate_virtualenv_cmd = _generate_virtualenv_cmd
-
-os.environ["PATH"] = f"/usr/local/airflow/.local/bin:{os.environ['PATH']}"
-
-
-class VirtualPythonPlugin(AirflowPlugin):
- name = 'virtual_python_plugin'
-```
-
-This is modified from the [AWS sample](https://docs.aws.amazon.com/mwaa/latest/userguide/samples-virtualenv.html).
-
-Next, create the plugins.zip file and upload it according to [AWS docs](https://docs.aws.amazon.com/mwaa/latest/userguide/configuring-dag-import-plugins.html). You will also need to [disable lazy plugin loading in MWAA](https://docs.aws.amazon.com/mwaa/latest/userguide/samples-virtualenv.html#samples-virtualenv-airflow-config).
-
-A DAG deployed using the PythonVirtualenvOperator would then look like:
-
-```python
-from datetime import timedelta
-
-from airflow import DAG
-
-from airflow.operators.python import PythonVirtualenvOperator
-
-from airflow.utils.dates import days_ago
-
-
-default_args = {
- "retries": 3,
- "retry_delay": timedelta(seconds=10),
- "execution_timeout": timedelta(minutes=60),
-}
-
-def metadata_ingestion_workflow():
- from metadata.workflow.metadata import MetadataWorkflow
-
-
- import yaml
-
- config = """
-YAML config
- """
- workflow_config = yaml.loads(config)
- workflow = MetadataWorkflow.create(workflow_config)
- workflow.execute()
- workflow.raise_from_status()
- workflow.print_status()
- workflow.stop()
-
-with DAG(
- "redshift_ingestion",
- default_args=default_args,
- description="An example DAG which runs a OpenMetadata ingestion workflow",
- start_date=days_ago(1),
- is_paused_upon_creation=False,
- catchup=False,
-) as dag:
- ingest_task = PythonVirtualenvOperator(
- task_id="ingest_redshift",
- python_callable=metadata_ingestion_workflow,
- requirements=['openmetadata-ingestion==1.0.5.0',
- 'apache-airflow==2.4.3', # note, v2.4.3 is the first version that does not conflict with OpenMetadata's 'tabulate' requirements
- 'apache-airflow-providers-amazon==6.0.0', # Amazon Airflow provider is necessary for MWAA
- 'watchtower',],
- system_site_packages=False,
- dag=dag,
- )
-```
-
-Where you can update the YAML configuration and workflow classes accordingly. accordingly. Further examples on how to
-run the ingestion can be found on the documentation (e.g., [Snowflake](/v1.11.x/connectors/database/snowflake)).
-
-You will also need to determine the OpenMetadata ingestion extras and Airflow providers you need. Note that the Openmetadata version needs to match the server version. If we are using the server at 0.12.2, then the ingestion package needs to also be 0.12.2. An example of the extras would look like this `openmetadata-ingestion[mysql,snowflake,s3]==0.12.2.2`.
-For Airflow providers, you will want to pull the provider versions from [the matching constraints file](https://raw.githubusercontent.com/apache/airflow/constraints-2.4.3/constraints-3.7.txt). Since this example installs Airflow Providers v2.4.3 on Python 3.7, we use that constraints file.
-
-Also note that the ingestion workflow function must be entirely self-contained as it will run by itself in the virtualenv. Any imports it needs, including the configuration, must exist within the function itself.
-
-
+
+For the Python VirtualenvOperator approach, see [MWAA with Python VirtualenvOperator](/v1.11.x/deployment/ingestion/external/mwaa/virtualenv).
+
diff --git a/v1.11.x/deployment/ingestion/external/mwaa/virtualenv.mdx b/v1.11.x/deployment/ingestion/external/mwaa/virtualenv.mdx
new file mode 100644
index 00000000..fbcc435f
--- /dev/null
+++ b/v1.11.x/deployment/ingestion/external/mwaa/virtualenv.mdx
@@ -0,0 +1,136 @@
+---
+title: MWAA Ingestion with Python VirtualenvOperator | Official Documentation
+description: Configure the PythonVirtualenvOperator in AWS MWAA for isolated ingestion workflow execution without library conflicts.
+sidebarTitle: Python VirtualenvOperator
+collate: false
+---
+
+import RunConnectorsClass from '/snippets/deployment/run-connectors-class.mdx'
+
+# Ingestion Workflows as a Python Virtualenv Operator
+
+## PROs
+
+- Installation does not clash with existing libraries
+- Simpler than ECS
+
+## CONs
+
+- We need to install an additional plugin in MWAA
+- DAGs take longer to run due to needing to set up the virtualenv from scratch for each run.
+
+We need to update the `requirements.txt` file from the MWAA environment to add the following line:
+
+```
+virtualenv
+```
+
+Then, we need to set up a custom plugin in MWAA. Create a file named virtual_python_plugin.py. Note that you may need to update the python version (eg, python3.7 -> python3.10) depending on what your MWAA environment is running.
+```python
+"""
+Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+the Software, and to permit persons to whom the Software is furnished to do so.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
+COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+"""
+from airflow.plugins_manager import AirflowPlugin
+import airflow.utils.python_virtualenv
+from typing import List
+import os
+
+
+def _generate_virtualenv_cmd(tmp_dir: str, python_bin: str, system_site_packages: bool) -> List[str]:
+ cmd = ['python3', '/usr/local/airflow/.local/lib/python3.7/site-packages/virtualenv', tmp_dir]
+ if system_site_packages:
+ cmd.append('--system-site-packages')
+ if python_bin is not None:
+ cmd.append(f'--python={python_bin}')
+ return cmd
+
+
+airflow.utils.python_virtualenv._generate_virtualenv_cmd = _generate_virtualenv_cmd
+
+os.environ["PATH"] = f"/usr/local/airflow/.local/bin:{os.environ['PATH']}"
+
+
+class VirtualPythonPlugin(AirflowPlugin):
+ name = 'virtual_python_plugin'
+```
+
+This is modified from the [AWS sample](https://docs.aws.amazon.com/mwaa/latest/userguide/samples-virtualenv.html).
+
+Next, create the plugins.zip file and upload it according to [AWS docs](https://docs.aws.amazon.com/mwaa/latest/userguide/configuring-dag-import-plugins.html). You will also need to [disable lazy plugin loading in MWAA](https://docs.aws.amazon.com/mwaa/latest/userguide/samples-virtualenv.html#samples-virtualenv-airflow-config).
+
+A DAG deployed using the PythonVirtualenvOperator would then look like:
+
+```python
+from datetime import timedelta
+
+from airflow import DAG
+
+from airflow.operators.python import PythonVirtualenvOperator
+
+from airflow.utils.dates import days_ago
+
+
+default_args = {
+ "retries": 3,
+ "retry_delay": timedelta(seconds=10),
+ "execution_timeout": timedelta(minutes=60),
+}
+
+def metadata_ingestion_workflow():
+ from metadata.workflow.metadata import MetadataWorkflow
+
+
+ import yaml
+
+ config = """
+YAML config
+ """
+ workflow_config = yaml.safe_load(config)
+ workflow = MetadataWorkflow.create(workflow_config)
+ workflow.execute()
+ workflow.raise_from_status()
+ workflow.print_status()
+ workflow.stop()
+
+with DAG(
+ "redshift_ingestion",
+ default_args=default_args,
+ description="An example DAG which runs a OpenMetadata ingestion workflow",
+ start_date=days_ago(1),
+ is_paused_upon_creation=False,
+ catchup=False,
+) as dag:
+ ingest_task = PythonVirtualenvOperator(
+ task_id="ingest_redshift",
+ python_callable=metadata_ingestion_workflow,
+ requirements=['openmetadata-ingestion==1.0.5.0',
+ 'apache-airflow==2.4.3', # note, v2.4.3 is the first version that does not conflict with OpenMetadata's 'tabulate' requirements
+ 'apache-airflow-providers-amazon==6.0.0', # Amazon Airflow provider is necessary for MWAA
+ 'watchtower',],
+ system_site_packages=False,
+ dag=dag,
+ )
+```
+
+Where you can update the YAML configuration and workflow classes accordingly. Further examples on how to
+run the ingestion can be found on the documentation (e.g., [Snowflake](/v1.11.x/connectors/database/snowflake)).
+
+You will also need to determine the OpenMetadata ingestion extras and Airflow providers you need. Note that the Openmetadata version needs to match the server version. If we are using the server at 0.12.2, then the ingestion package needs to also be 0.12.2. An example of the extras would look like this `openmetadata-ingestion[mysql,snowflake,s3]==0.12.2.2`.
+For Airflow providers, you will want to pull the provider versions from [the matching constraints file](https://raw.githubusercontent.com/apache/airflow/constraints-2.4.3/constraints-3.7.txt). Since this example installs Airflow Providers v2.4.3 on Python 3.7, we use that constraints file.
+
+Also note that the ingestion workflow function must be entirely self-contained as it will run by itself in the virtualenv. Any imports it needs, including the configuration, must exist within the function itself.
+
+
diff --git a/v1.11.x/deployment/kubernetes/gke.mdx b/v1.11.x/deployment/kubernetes/gke.mdx
index 3a643f0a..5485d872 100644
--- a/v1.11.x/deployment/kubernetes/gke.mdx
+++ b/v1.11.x/deployment/kubernetes/gke.mdx
@@ -1,12 +1,10 @@
---
title: Kubernetes GKE Deployment | Official Documentation
description: Run your deployment on Google Kubernetes Engine (GKE) for a reliable, managed Kubernetes experience with secure configurations.
-sidebarTitle: Gke
+sidebarTitle: Overview
collate: false
---
-import Faqs from '/snippets/deployment/faqs.mdx'
-
# GKE on Google Cloud Platform Deployment
OpenMetadata supports the Installation and Running of Application on Google Kubernetes Engine through Helm Charts.
@@ -115,305 +113,6 @@ Also, disable MySQL and ElasticSearch from OpenMetadata Dependencies Helm Charts
-### Persistent Volumes with ReadWriteMany Access Modes
-
-OpenMetadata helm chart depends on Airflow and Airflow expects a persistent disk that support ReadWriteMany (the volume can be mounted as read-write by many nodes).
-
-The workaround is to create nfs-server disk on Google Kubernetes Engine and use that as the persistent claim and deploy OpenMetadata by implementing the following steps in order.
-
-## Create NFS Share
-
-### Provision GCP Persistent Disk for Google Kubernetes Engine
-
-Run the below command to create a gcloud compute zonal disk. For more information on Google Cloud Disk Options, please visit [here](https://cloud.google.com/compute/docs/disks).
-
-```commandline
-gcloud compute disks create --size=100GB --zone= nfs-disk
-```
-
-### Deploy NFS Server in GKE
-
-### Code Samples
-
-```yaml
-# nfs-server-deployment.yml
-apiVersion: apps/v1
-kind: Deployment
-metadata:
- name: nfs-server
-spec:
- replicas: 1
- selector:
- matchLabels:
- role: nfs-server
- template:
- metadata:
- labels:
- role: nfs-server
- spec:
- initContainers:
- - name: init-airflow-directories
- image: busybox
- command: ['sh', '-c', 'mkdir -p /exports/airflow-dags /exports/airflow-logs']
- volumeMounts:
- - mountPath: /exports
- name: nfs-pvc
- containers:
- - name: nfs-server
- image: itsthenetwork/nfs-server-alpine
- env:
- - name: SHARED_DIRECTORY
- value: /exports
- ports:
- - name: nfs
- containerPort: 2049
- securityContext:
- privileged: true
- volumeMounts:
- - mountPath: /exports
- name: nfs-pvc
- volumes:
- - name: nfs-pvc
- gcePersistentDisk:
- pdName: nfs-disk
- fsType: ext4
----
-# nfs-cluster-ip-service.yml
-apiVersion: v1
-kind: Service
-metadata:
- name: nfs-server
-spec:
- ports:
- - name: nfs
- port: 2049
- selector:
- role: nfs-server
-```
-Run the commands below and ensure the pods are running.
-
-```commandline
-kubectl create -f nfs-server-deployment.yml
-kubectl create -f nfs-cluster-ip-service.yml
-```
-
-We create a ClusterIP Service for pods to access NFS within the cluster at a fixed IP/DNS.
-
-### Provision NFS backed PV and PVC for Airflow DAGs and Airflow Logs
-
-Update `` with the NFS Service Cluster IP Address for below code snippets.
-You can get the clusterIP using the following command
-
-```commandline
-kubectl get service nfs-server -o jsonpath='{.spec.clusterIP}'
-```
-
-### Code Samples for PV and PVC for Airflow DAGs
-
-```yaml
-# dags_pv_pvc.yml
-apiVersion: v1
-kind: PersistentVolume
-metadata:
- name: openmetadata-dependencies-dags-pv
-spec:
- capacity:
- storage: 10Gi
- accessModes:
- - ReadWriteMany
- nfs:
- server:
- path: "/airflow-dags"
-
----
-apiVersion: v1
-kind: PersistentVolumeClaim
-metadata:
- labels:
- app: airflow
- release: openmetadata-dependencies
- name: openmetadata-dependencies-dags
- namespace: default
-spec:
- accessModes:
- - ReadWriteMany
- resources:
- requests:
- storage: 10Gi
- storageClassName: ""
-```
-
-Create Persistent Volumes and Persistent Volume claims with the below command.
-
-```commandline
-kubectl create -f dags_pv_pvc.yml
-```
-
-### Code Samples for PV and PVC for Airflow Logs
-
-```yaml
-# logs_pv_pvc.yml
-apiVersion: v1
-kind: PersistentVolume
-metadata:
- name: openmetadata-dependencies-logs-pv
-spec:
- capacity:
- storage: 10Gi
- accessModes:
- - ReadWriteMany
- nfs:
- server:
- path: "/airflow-logs"
-
----
-apiVersion: v1
-kind: PersistentVolumeClaim
-metadata:
- labels:
- app: airflow
- name: openmetadata-dependencies-logs
- namespace: default
-spec:
- accessModes:
- - ReadWriteMany
- resources:
- requests:
- storage: 10Gi
- storageClassName: ""
-```
-
-Create Persistent Volumes and Persistent Volume claims with the below command.
-
-```commandline
-kubectl create -f logs_pv_pvc.yml
-```
-
-## Change owner and permission manually on disks
-
-Since airflow pods run as non root users, they would not have write access on the nfs server volumes. In order to fix the permission here, spin up a pod with persistent volumes attached and run it once.
-
-```yaml
-# permissions_pod.yml
-apiVersion: v1
-kind: Pod
-metadata:
- creationTimestamp: null
- labels:
- run: my-permission-pod
- name: my-permission-pod
-spec:
- containers:
- - image: nginx
- name: my-permission-pod
- volumeMounts:
- - name: airflow-dags
- mountPath: /airflow-dags
- - name: airflow-logs
- mountPath: /airflow-logs
- volumes:
- - name: airflow-logs
- persistentVolumeClaim:
- claimName: openmetadata-dependencies-logs
- - name: airflow-dags
- persistentVolumeClaim:
- claimName: openmetadata-dependencies-dags
- dnsPolicy: ClusterFirst
- restartPolicy: Always
-```
-
-
-
-Airflow runs the pods with linux user name as airflow and linux user id as 50000.
-
-
-
-Run the below command to create the pod and fix the permissions
-
-```commandline
-kubectl create -f permissions_pod.yml
-```
-
-Once the permissions pod is up and running, execute the below commands within the container.
-
-```commandline
-kubectl exec --tty my-permission-pod --container my-permission-pod -- chown -R 50000 /airflow-dags /airflow-logs
-# If needed
-kubectl exec --tty my-permission-pod --container my-permission-pod -- chmod -R a+rwx /airflow-dags
-```
-
-## Create OpenMetadata dependencies Values
-
-Override openmetadata dependencies airflow helm values to bind the nfs persistent volumes for DAGs and logs.
-
-```yaml
-# values-dependencies.yml
-airflow:
- airflow:
- extraVolumeMounts:
- - mountPath: /airflow-logs
- name: nfs-airflow-logs
- - mountPath: /airflow-dags/dags
- name: nfs-airflow-dags
- extraVolumes:
- - name: nfs-airflow-logs
- persistentVolumeClaim:
- claimName: openmetadata-dependencies-logs
- - name: nfs-airflow-dags
- persistentVolumeClaim:
- claimName: openmetadata-dependencies-dags
- config:
- AIRFLOW__OPENMETADATA_AIRFLOW_APIS__DAG_GENERATED_CONFIGS: "/airflow-dags/dags"
- dags:
- path: /airflow-dags/dags
- persistence:
- enabled: false
- logs:
- path: /airflow-logs
- persistence:
- enabled: false
-```
-
-
-For more information on airflow helm chart values, please refer to [airflow-helm](https://artifacthub.io/packages/helm/airflow-helm/airflow/8.8.0).
-
-When deploying openmeteadata dependencies helm chart, use the below command -
-
-```commandline
-helm install openmetadata-dependencies open-metadata/openmetadata-dependencies --values values-dependencies.yaml
-```
-
-
-
-The above command uses configurations defined [here](https://raw.githubusercontent.com/open-metadata/openmetadata-helm-charts/main/charts/deps/values.yaml).
-You can modify any configuration and deploy by passing your own `values.yaml`
-
-```commandline
-helm install openmetadata-dependencies open-metadata/openmetadata-dependencies --values
-```
-
-Once the openmetadata dependencies helm chart deployed, you can then run the below command to install the openmetadata helm chart -
-
-```commandline
-helm install openmetadata open-metadata/openmetadata --values
-```
-
-## Troubleshooting
-
-## Pods are stuck in Pending State due to Persistent Volume Creation Failure
-
-If you came across `invalid access type while creating the pvc`, and the permission pod is stuck in "pending" state.
-
-The above error might have occurred due to the pvc volumes not setup or pvc volumes are not mounted properly.
-
-
-
-
-Please validate:
-- all the prerequisites mentioned in this [section](#prerequisites)
-- the configuration of `dags_pv_pvc.yml` file
-- `storageClassName` field in YAML file
-
-## FAQs
-
-
\ No newline at end of file
+
+For the Airflow orchestrator NFS setup, persistent volume configuration, permissions, and troubleshooting, see the [GKE Airflow Orchestrator](/v1.11.x/deployment/kubernetes/gke/airflow) guide.
+
diff --git a/v1.11.x/deployment/kubernetes/gke/airflow.mdx b/v1.11.x/deployment/kubernetes/gke/airflow.mdx
new file mode 100644
index 00000000..399cae93
--- /dev/null
+++ b/v1.11.x/deployment/kubernetes/gke/airflow.mdx
@@ -0,0 +1,319 @@
+---
+title: GKE with Apache Airflow Orchestrator | Official Documentation
+description: Configure Apache Airflow as the ingestion orchestrator on Google Kubernetes Engine with NFS persistent volumes and ReadWriteMany access.
+sidebarTitle: Airflow Orchestrator
+collate: false
+---
+
+import Faqs from '/snippets/deployment/faqs.mdx'
+
+# GKE with Apache Airflow Orchestrator
+
+
+
+All the code snippets in this section assume the `default` namespace for kubernetes.
+
+
+
+## Persistent Volumes with ReadWriteMany Access Modes
+
+OpenMetadata helm chart depends on Airflow and Airflow expects a persistent disk that support ReadWriteMany (the volume can be mounted as read-write by many nodes).
+
+The workaround is to create nfs-server disk on Google Kubernetes Engine and use that as the persistent claim and deploy OpenMetadata by implementing the following steps in order.
+
+## Create NFS Share
+
+### Provision GCP Persistent Disk for Google Kubernetes Engine
+
+Run the below command to create a gcloud compute zonal disk. For more information on Google Cloud Disk Options, please visit [here](https://cloud.google.com/compute/docs/disks).
+
+```commandline
+gcloud compute disks create --size=100GB --zone= nfs-disk
+```
+
+### Deploy NFS Server in GKE
+
+### Code Samples
+
+```yaml
+# nfs-server-deployment.yml
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+ name: nfs-server
+spec:
+ replicas: 1
+ selector:
+ matchLabels:
+ role: nfs-server
+ template:
+ metadata:
+ labels:
+ role: nfs-server
+ spec:
+ initContainers:
+ - name: init-airflow-directories
+ image: busybox
+ command: ['sh', '-c', 'mkdir -p /exports/airflow-dags /exports/airflow-logs']
+ volumeMounts:
+ - mountPath: /exports
+ name: nfs-pvc
+ containers:
+ - name: nfs-server
+ image: itsthenetwork/nfs-server-alpine
+ env:
+ - name: SHARED_DIRECTORY
+ value: /exports
+ ports:
+ - name: nfs
+ containerPort: 2049
+ securityContext:
+ privileged: true
+ volumeMounts:
+ - mountPath: /exports
+ name: nfs-pvc
+ volumes:
+ - name: nfs-pvc
+ gcePersistentDisk:
+ pdName: nfs-disk
+ fsType: ext4
+---
+# nfs-cluster-ip-service.yml
+apiVersion: v1
+kind: Service
+metadata:
+ name: nfs-server
+spec:
+ ports:
+ - name: nfs
+ port: 2049
+ selector:
+ role: nfs-server
+```
+Run the commands below and ensure the pods are running.
+
+```commandline
+kubectl create -f nfs-server-deployment.yml
+kubectl create -f nfs-cluster-ip-service.yml
+```
+
+We create a ClusterIP Service for pods to access NFS within the cluster at a fixed IP/DNS.
+
+### Provision NFS backed PV and PVC for Airflow DAGs and Airflow Logs
+
+Update `` with the NFS Service Cluster IP Address for below code snippets.
+You can get the clusterIP using the following command
+
+```commandline
+kubectl get service nfs-server -o jsonpath='{.spec.clusterIP}'
+```
+
+### Code Samples for PV and PVC for Airflow DAGs
+
+```yaml
+# dags_pv_pvc.yml
+apiVersion: v1
+kind: PersistentVolume
+metadata:
+ name: openmetadata-dependencies-dags-pv
+spec:
+ capacity:
+ storage: 10Gi
+ accessModes:
+ - ReadWriteMany
+ nfs:
+ server:
+ path: "/airflow-dags"
+
+---
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+ labels:
+ app: airflow
+ release: openmetadata-dependencies
+ name: openmetadata-dependencies-dags
+ namespace: default
+spec:
+ accessModes:
+ - ReadWriteMany
+ resources:
+ requests:
+ storage: 10Gi
+ storageClassName: ""
+```
+
+Create Persistent Volumes and Persistent Volume claims with the below command.
+
+```commandline
+kubectl create -f dags_pv_pvc.yml
+```
+
+### Code Samples for PV and PVC for Airflow Logs
+
+```yaml
+# logs_pv_pvc.yml
+apiVersion: v1
+kind: PersistentVolume
+metadata:
+ name: openmetadata-dependencies-logs-pv
+spec:
+ capacity:
+ storage: 10Gi
+ accessModes:
+ - ReadWriteMany
+ nfs:
+ server:
+ path: "/airflow-logs"
+
+---
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+ labels:
+ app: airflow
+ name: openmetadata-dependencies-logs
+ namespace: default
+spec:
+ accessModes:
+ - ReadWriteMany
+ resources:
+ requests:
+ storage: 10Gi
+ storageClassName: ""
+```
+
+Create Persistent Volumes and Persistent Volume claims with the below command.
+
+```commandline
+kubectl create -f logs_pv_pvc.yml
+```
+
+## Change owner and permission manually on disks
+
+Since airflow pods run as non root users, they would not have write access on the nfs server volumes. In order to fix the permission here, spin up a pod with persistent volumes attached and run it once.
+
+```yaml
+# permissions_pod.yml
+apiVersion: v1
+kind: Pod
+metadata:
+ creationTimestamp: null
+ labels:
+ run: my-permission-pod
+ name: my-permission-pod
+spec:
+ containers:
+ - image: nginx
+ name: my-permission-pod
+ volumeMounts:
+ - name: airflow-dags
+ mountPath: /airflow-dags
+ - name: airflow-logs
+ mountPath: /airflow-logs
+ volumes:
+ - name: airflow-logs
+ persistentVolumeClaim:
+ claimName: openmetadata-dependencies-logs
+ - name: airflow-dags
+ persistentVolumeClaim:
+ claimName: openmetadata-dependencies-dags
+ dnsPolicy: ClusterFirst
+ restartPolicy: Always
+```
+
+
+
+Airflow runs the pods with linux user name as airflow and linux user id as 50000.
+
+
+
+Run the below command to create the pod and fix the permissions
+
+```commandline
+kubectl create -f permissions_pod.yml
+```
+
+Once the permissions pod is up and running, execute the below commands within the container.
+
+```commandline
+kubectl exec --tty my-permission-pod --container my-permission-pod -- chown -R 50000 /airflow-dags /airflow-logs
+# If needed
+kubectl exec --tty my-permission-pod --container my-permission-pod -- chmod -R a+rwx /airflow-dags
+```
+
+## Create OpenMetadata dependencies Values
+
+Override openmetadata dependencies airflow helm values to bind the nfs persistent volumes for DAGs and logs.
+
+```yaml
+# values-dependencies.yml
+airflow:
+ airflow:
+ extraVolumeMounts:
+ - mountPath: /airflow-logs
+ name: nfs-airflow-logs
+ - mountPath: /airflow-dags/dags
+ name: nfs-airflow-dags
+ extraVolumes:
+ - name: nfs-airflow-logs
+ persistentVolumeClaim:
+ claimName: openmetadata-dependencies-logs
+ - name: nfs-airflow-dags
+ persistentVolumeClaim:
+ claimName: openmetadata-dependencies-dags
+ config:
+ AIRFLOW__OPENMETADATA_AIRFLOW_APIS__DAG_GENERATED_CONFIGS: "/airflow-dags/dags"
+ dags:
+ path: /airflow-dags/dags
+ persistence:
+ enabled: false
+ logs:
+ path: /airflow-logs
+ persistence:
+ enabled: false
+```
+
+
+For more information on airflow helm chart values, please refer to [airflow-helm](https://artifacthub.io/packages/helm/airflow-helm/airflow/8.8.0).
+
+When deploying openmeteadata dependencies helm chart, use the below command -
+
+```commandline
+helm install openmetadata-dependencies open-metadata/openmetadata-dependencies --values values-dependencies.yaml
+```
+
+
+
+The above command uses configurations defined [here](https://raw.githubusercontent.com/open-metadata/openmetadata-helm-charts/main/charts/deps/values.yaml).
+You can modify any configuration and deploy by passing your own `values.yaml`
+
+```commandline
+helm install openmetadata-dependencies open-metadata/openmetadata-dependencies --values
+```
+
+Once the openmetadata dependencies helm chart deployed, you can then run the below command to install the openmetadata helm chart -
+
+```commandline
+helm install openmetadata open-metadata/openmetadata --values
+```
+
+## Troubleshooting
+
+## Pods are stuck in Pending State due to Persistent Volume Creation Failure
+
+If you came across `invalid access type while creating the pvc`, and the permission pod is stuck in "pending" state.
+
+The above error might have occurred due to the pvc volumes not setup or pvc volumes are not mounted properly.
+
+
+
+
+Please validate:
+- all the prerequisites mentioned in this [section](#prerequisites)
+- the configuration of `dags_pv_pvc.yml` file
+- `storageClassName` field in YAML file
+
+## FAQs
+
+
diff --git a/v1.11.x/how-to-guides/data-quality-observability/quality/data-quality-as-code/test-runner.mdx b/v1.11.x/how-to-guides/data-quality-observability/quality/data-quality-as-code/test-runner.mdx
index 2ac0a804..556b506f 100644
--- a/v1.11.x/how-to-guides/data-quality-observability/quality/data-quality-as-code/test-runner.mdx
+++ b/v1.11.x/how-to-guides/data-quality-observability/quality/data-quality-as-code/test-runner.mdx
@@ -1,7 +1,7 @@
---
title: TestRunner - Running Table-Level Tests
description: Execute data quality tests against tables in OpenMetadata using the TestRunner API
-sidebarTitle: Test Runner
+sidebarTitle: Overview
---
# TestRunner - Running Table-Level Tests
@@ -20,11 +20,10 @@ The `TestRunner` class provides a fluent API for executing data quality tests ag
- [Integration with ETL Workflows](#integration-with-etl-workflows)
- [Error Handling](#error-handling)
- [Best Practices](#best-practices)
-- [Using External Secrets Managers](#using-external-secrets-managers)
- [Next Steps](#next-steps)
-⚠️ If you're using OpenMetadata Cloud to run OpenMetadata, please refer to the section about [External Secrets Managers](#using-external-secrets-managers)
+⚠️ If you're using OpenMetadata Cloud to run OpenMetadata, please refer to [External Secrets Managers](/v1.11.x/how-to-guides/data-quality-observability/quality/data-quality-as-code/test-runner/external-secrets) before using the TestRunner API.
## Overview
@@ -408,178 +407,9 @@ except Exception as e:
5. **Combine table and column tests**: Ensure both structural and content quality
-## Using External Secrets Managers
-
-### Important Note
-
-If your OpenMetadata instance uses **database-stored credentials** (the default configuration), you do not need to follow this guide. The SDK will automatically retrieve and decrypt credentials.
-
-This guide is only necessary when your organization uses an **external secrets manager** for credential storage.
-
-### Why This is Required
-
-The `TestRunner` API executes data quality tests directly from your Python code (e.g., within your ETL pipelines). To connect to your data sources, it needs to:
-
-1. Retrieve the service connection configuration from OpenMetadata
-2. Decrypt the credentials stored in your secrets manager
-3. Establish a connection to the data source
-4. Execute the test cases
-
-Without proper secrets manager configuration, the SDK cannot decrypt credentials and will fail to connect to your data sources.
-
-### General Setup Steps
-
-1. **Contact your OpenMetadata/OpenMetadata administrator** to obtain:
- - The secrets manager type (AWS, Azure, GCP, etc.)
- - The secrets manager loader configuration
- - Required environment variables or configuration files
- - Any additional setup (IAM roles, service principals, etc.)
-
-2. **Install required dependencies** for your secrets manager provider
-
-3. **Configure environment variables** with access credentials
-
-4. **Initialize the SecretsManagerFactory** before using TestRunner
-
-5. **Configure the SDK** and run your tests
-
-### Example using AWS Secrets Manager
-
-**Required Dependencies:**
-```bash
-pip install "openmetadata-ingestion[aws]>=1.11.0.0"
-```
-
-**Example Configuration:**
-```python
-import os
-
-from metadata.generated.schema.security.secrets.secretsManagerClientLoader import SecretsManagerClientLoader
-from metadata.generated.schema.security.secrets.secretsManagerProvider import SecretsManagerProvider
-from metadata.sdk import configure
-from metadata.sdk.data_quality import TestRunner
-from metadata.utils.secrets.secrets_manager_factory import SecretsManagerFactory
-
-# Set AWS credentials and region
-os.environ["AWS_ACCESS_KEY_ID"] = "your-access-key-id"
-os.environ["AWS_SECRET_ACCESS_KEY"] = "your-secret-access-key"
-os.environ["AWS_DEFAULT_REGION"] = "us-east-1" # Your AWS region
-
-# Initialize secrets manager (must be done before configure())
-SecretsManagerFactory(
- secrets_manager_provider=SecretsManagerProvider.managed_aws,
- secrets_manager_loader=SecretsManagerClientLoader.env,
-)
-
-# Configure OpenMetadata SDK
-configure(
- host="https://your-openmetadata-instance.com/api",
- jwt_token="your-jwt-token",
-)
-
-# Use TestRunner as normal
-runner = TestRunner.for_table("MySQL.production.database.my_table")
-results = runner.run()
-```
-
-### Configuration by Provider
-
-#### AWS and AWS Parameters Store
-
-**OpenMetadata's ingestion extras**: `aws` (e.g `pip install 'openmetadata-ingestion[aws]'`)
-
-**SecretsManagerProvider: (one of)**
-- `SecretsManagerProvider.aws`
-- `SecretsManagerProvider.managed_aws`
-- `SecretsManagerProvider.aws_ssm`
-- `SecretsManagerProvider.managed_aws_ssm`
-
-**Environment variables:**
-- `AWS_ACCESS_KEY_ID`
-- `AWS_SECRET_ACCESS_KEY`
-- `AWS_DEFAULT_REGION`
-
-#### Azure Key Vault
-
-**OpenMetadata's ingestion extras**: `azure` (e.g `pip install 'openmetadata-ingestion[azure]'`)
-
-**SecretsManagerProvider: (one of)**
-- `SecretsManagerProvider.azure_kv`
-- `SecretsManagerProvider.managed_azure_kv`
-
-**Environment variables:**
-- `AZURE_CLIENT_ID`
-- `AZURE_CLIENT_SECRET`
-- `AZURE_TENANT_ID`
-- `AZURE_KEY_VAULT_NAME`
-
-#### Google Cloud Secret Manager
-
-**OpenMetadata's ingestion extras**: `gcp` (e.g `pip install 'openmetadata-ingestion[gcp]'`)
-
-**SecretsManagerProvider:** `SecretsManagerProvider.gcp`
-
-**Environment variables:**
-- `GOOGLE_APPLICATION_CREDENTIALS`: path to the file with the credentials json file
-- `GCP_PROJECT_ID`
-
-### Troubleshooting
-
-#### Error: "Cannot decrypt service connection"
-
-**Cause**: Secrets manager not initialized or misconfigured
-
-**Solution**: Ensure `SecretsManagerFactory` is initialized **before** calling `configure()` or creating the `TestRunner`
-
-#### Error: "Access Denied" or "Unauthorized"
-
-**Cause**: Insufficient permissions to access secrets
-
-**Solution**:
-- Verify IAM role/service principal has correct permissions
-- Check credentials are valid and not expired
-- Ensure correct region/vault name is specified
-
-#### Error: "Module not found" for secrets manager
-
-**Cause**: Missing dependencies for your secrets manager
-
-**Solution**: Install required extras:
-```bash
-# For AWS
-pip install "openmetadata-ingestion[aws]"
-
-# For Azure
-pip install "openmetadata-ingestion[azure]"
-
-# For GCP
-pip install "openmetadata-ingestion[gcp]"
-```
-
-#### Tests Fail with Connection Errors
-
-**Cause**: Credentials not properly decrypted or secrets manager misconfigured
-
-**Solution**:
-1. Verify secrets manager provider matches your OpenMetadata backend configuration
-2. Test credential access independently (e.g., using AWS CLI, Azure CLI, gcloud)
-3. Check network connectivity to secrets manager service
-4. Enable debug logging to see detailed error messages:
-
-```python
-import logging
-logging.basicConfig(level=logging.DEBUG)
-```
-
-### Contact Your Administrator
-
-If you're unsure about:
-- Which secrets manager your organization uses
-- Required environment variables or configuration
-- Access credentials or IAM roles
-- Permissions needed
-
-**Contact your OpenMetadata or OpenMetadata administrator** for the specific configuration required in your environment.
+
+If your organization uses an external secrets manager (AWS, Azure, GCP), see [External Secrets Managers](/v1.11.x/how-to-guides/data-quality-observability/quality/data-quality-as-code/test-runner/external-secrets) before using the TestRunner API.
+
## Next Steps
diff --git a/v1.11.x/how-to-guides/data-quality-observability/quality/data-quality-as-code/test-runner/external-secrets.mdx b/v1.11.x/how-to-guides/data-quality-observability/quality/data-quality-as-code/test-runner/external-secrets.mdx
new file mode 100644
index 00000000..1cf58669
--- /dev/null
+++ b/v1.11.x/how-to-guides/data-quality-observability/quality/data-quality-as-code/test-runner/external-secrets.mdx
@@ -0,0 +1,182 @@
+---
+title: External Secrets Managers | TestRunner
+description: Configure external secrets managers (AWS, Azure, GCP) for the TestRunner API when your OpenMetadata instance uses an external secrets manager.
+sidebarTitle: External Secrets
+---
+
+# Using External Secrets Managers
+
+## Important Note
+
+If your OpenMetadata instance uses **database-stored credentials** (the default configuration), you do not need to follow this guide. The SDK will automatically retrieve and decrypt credentials.
+
+This guide is only necessary when your organization uses an **external secrets manager** for credential storage.
+
+
+⚠️ If you're using OpenMetadata Cloud to run OpenMetadata, please refer to this guide to configure your external secrets manager before using the TestRunner API.
+
+
+## Why This is Required
+
+The `TestRunner` API executes data quality tests directly from your Python code (e.g., within your ETL pipelines). To connect to your data sources, it needs to:
+
+1. Retrieve the service connection configuration from OpenMetadata
+2. Decrypt the credentials stored in your secrets manager
+3. Establish a connection to the data source
+4. Execute the test cases
+
+Without proper secrets manager configuration, the SDK cannot decrypt credentials and will fail to connect to your data sources.
+
+## General Setup Steps
+
+1. **Contact your OpenMetadata/OpenMetadata administrator** to obtain:
+ - The secrets manager type (AWS, Azure, GCP, etc.)
+ - The secrets manager loader configuration
+ - Required environment variables or configuration files
+ - Any additional setup (IAM roles, service principals, etc.)
+
+2. **Install required dependencies** for your secrets manager provider
+
+3. **Configure environment variables** with access credentials
+
+4. **Initialize the SecretsManagerFactory** before using TestRunner
+
+5. **Configure the SDK** and run your tests
+
+## Example using AWS Secrets Manager
+
+**Required Dependencies:**
+```bash
+pip install "openmetadata-ingestion[aws]>=1.11.0.0"
+```
+
+**Example Configuration:**
+```python
+import os
+
+from metadata.generated.schema.security.secrets.secretsManagerClientLoader import SecretsManagerClientLoader
+from metadata.generated.schema.security.secrets.secretsManagerProvider import SecretsManagerProvider
+from metadata.sdk import configure
+from metadata.sdk.data_quality import TestRunner
+from metadata.utils.secrets.secrets_manager_factory import SecretsManagerFactory
+
+# Set AWS credentials and region
+os.environ["AWS_ACCESS_KEY_ID"] = "your-access-key-id"
+os.environ["AWS_SECRET_ACCESS_KEY"] = "your-secret-access-key"
+os.environ["AWS_DEFAULT_REGION"] = "us-east-1" # Your AWS region
+
+# Initialize secrets manager (must be done before configure())
+SecretsManagerFactory(
+ secrets_manager_provider=SecretsManagerProvider.managed_aws,
+ secrets_manager_loader=SecretsManagerClientLoader.env,
+)
+
+# Configure OpenMetadata SDK
+configure(
+ host="https://your-openmetadata-instance.com/api",
+ jwt_token="your-jwt-token",
+)
+
+# Use TestRunner as normal
+runner = TestRunner.for_table("MySQL.production.database.my_table")
+results = runner.run()
+```
+
+## Configuration by Provider
+
+### AWS and AWS Systems Manager Parameter Store
+
+**OpenMetadata's ingestion extras**: `aws` (e.g `pip install 'openmetadata-ingestion[aws]'`)
+
+**SecretsManagerProvider: (one of)**
+- `SecretsManagerProvider.aws`
+- `SecretsManagerProvider.managed_aws`
+- `SecretsManagerProvider.aws_ssm`
+- `SecretsManagerProvider.managed_aws_ssm`
+
+**Environment variables:**
+- `AWS_ACCESS_KEY_ID`
+- `AWS_SECRET_ACCESS_KEY`
+- `AWS_DEFAULT_REGION`
+
+### Azure Key Vault
+
+**OpenMetadata's ingestion extras**: `azure` (e.g `pip install 'openmetadata-ingestion[azure]'`)
+
+**SecretsManagerProvider: (one of)**
+- `SecretsManagerProvider.azure_kv`
+- `SecretsManagerProvider.managed_azure_kv`
+
+**Environment variables:**
+- `AZURE_CLIENT_ID`
+- `AZURE_CLIENT_SECRET`
+- `AZURE_TENANT_ID`
+- `AZURE_KEY_VAULT_NAME`
+
+### Google Cloud Secret Manager
+
+**OpenMetadata's ingestion extras**: `gcp` (e.g `pip install 'openmetadata-ingestion[gcp]'`)
+
+**SecretsManagerProvider:** `SecretsManagerProvider.gcp`
+
+**Environment variables:**
+- `GOOGLE_APPLICATION_CREDENTIALS`: path to the file with the credentials json file
+- `GCP_PROJECT_ID`
+
+## Troubleshooting
+
+### Error: "Cannot decrypt service connection"
+
+**Cause**: Secrets manager not initialized or misconfigured
+
+**Solution**: Ensure `SecretsManagerFactory` is initialized **before** calling `configure()` or creating the `TestRunner`
+
+### Error: "Access Denied" or "Unauthorized"
+
+**Cause**: Insufficient permissions to access secrets
+
+**Solution**:
+- Verify IAM role/service principal has correct permissions
+- Check credentials are valid and not expired
+- Ensure correct region/vault name is specified
+
+### Error: "Module not found" for secrets manager
+
+**Cause**: Missing dependencies for your secrets manager
+
+**Solution**: Install required extras:
+```bash
+# For AWS
+pip install "openmetadata-ingestion[aws]"
+
+# For Azure
+pip install "openmetadata-ingestion[azure]"
+
+# For GCP
+pip install "openmetadata-ingestion[gcp]"
+```
+
+### Tests Fail with Connection Errors
+
+**Cause**: Credentials not properly decrypted or secrets manager misconfigured
+
+**Solution**:
+1. Verify secrets manager provider matches your OpenMetadata backend configuration
+2. Test credential access independently (e.g., using AWS CLI, Azure CLI, gcloud)
+3. Check network connectivity to secrets manager service
+4. Enable debug logging to see detailed error messages:
+
+```python
+import logging
+logging.basicConfig(level=logging.DEBUG)
+```
+
+## Contact Your Administrator
+
+If you're unsure about:
+- Which secrets manager your organization uses
+- Required environment variables or configuration
+- Access credentials or IAM roles
+- Permissions needed
+
+**Contact your OpenMetadata or OpenMetadata administrator** for the specific configuration required in your environment.
diff --git a/v1.12.x/deployment/ingestion/external/mwaa.mdx b/v1.12.x/deployment/ingestion/external/mwaa.mdx
index 6c3c4362..e09aab99 100644
--- a/v1.12.x/deployment/ingestion/external/mwaa.mdx
+++ b/v1.12.x/deployment/ingestion/external/mwaa.mdx
@@ -1,7 +1,7 @@
---
title: Run the ingestion from AWS MWAA | Official Documentation
description: Integrate with MWAA to schedule and execute ingestion workflows using managed Airflow on AWS infrastructure.
-sidebarTitle: Mwaa
+sidebarTitle: Overview
collate: false
---
@@ -319,130 +319,7 @@ Moreover, one of the imports will depend on the MWAA Airflow version you are usi
Make sure to update the `ecs_operator_task` task call accordingly.
-## Ingestion Workflows as a Python Virtualenv Operator
-### PROs
-
-- Installation does not clash with existing libraries
-- Simpler than ECS
-
-### CONs
-
-- We need to install an additional plugin in MWAA
-- DAGs take longer to run due to needing to set up the virtualenv from scratch for each run.
-
-We need to update the `requirements.txt` file from the MWAA environment to add the following line:
-
-```
-virtualenv
-```
-
-Then, we need to set up a custom plugin in MWAA. Create a file named virtual_python_plugin.py. Note that you may need to update the python version (eg, python3.7 -> python3.10) depending on what your MWAA environment is running.
-```python
-"""
-Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy of
-this software and associated documentation files (the "Software"), to deal in
-the Software without restriction, including without limitation the rights to
-use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
-the Software, and to permit persons to whom the Software is furnished to do so.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
-FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
-COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
-IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-"""
-from airflow.plugins_manager import AirflowPlugin
-import airflow.utils.python_virtualenv
-from typing import List
-import os
-
-
-def _generate_virtualenv_cmd(tmp_dir: str, python_bin: str, system_site_packages: bool) -> List[str]:
- cmd = ['python3', '/usr/local/airflow/.local/lib/python3.7/site-packages/virtualenv', tmp_dir]
- if system_site_packages:
- cmd.append('--system-site-packages')
- if python_bin is not None:
- cmd.append(f'--python={python_bin}')
- return cmd
-
-
-airflow.utils.python_virtualenv._generate_virtualenv_cmd = _generate_virtualenv_cmd
-
-os.environ["PATH"] = f"/usr/local/airflow/.local/bin:{os.environ['PATH']}"
-
-
-class VirtualPythonPlugin(AirflowPlugin):
- name = 'virtual_python_plugin'
-```
-
-This is modified from the [AWS sample](https://docs.aws.amazon.com/mwaa/latest/userguide/samples-virtualenv.html).
-
-Next, create the plugins.zip file and upload it according to [AWS docs](https://docs.aws.amazon.com/mwaa/latest/userguide/configuring-dag-import-plugins.html). You will also need to [disable lazy plugin loading in MWAA](https://docs.aws.amazon.com/mwaa/latest/userguide/samples-virtualenv.html#samples-virtualenv-airflow-config).
-
-A DAG deployed using the PythonVirtualenvOperator would then look like:
-
-```python
-from datetime import timedelta
-
-from airflow import DAG
-
-from airflow.operators.python import PythonVirtualenvOperator
-
-from airflow.utils.dates import days_ago
-
-
-default_args = {
- "retries": 3,
- "retry_delay": timedelta(seconds=10),
- "execution_timeout": timedelta(minutes=60),
-}
-
-def metadata_ingestion_workflow():
- from metadata.workflow.metadata import MetadataWorkflow
-
-
- import yaml
-
- config = """
-YAML config
- """
- workflow_config = yaml.loads(config)
- workflow = MetadataWorkflow.create(workflow_config)
- workflow.execute()
- workflow.raise_from_status()
- workflow.print_status()
- workflow.stop()
-
-with DAG(
- "redshift_ingestion",
- default_args=default_args,
- description="An example DAG which runs a OpenMetadata ingestion workflow",
- start_date=days_ago(1),
- is_paused_upon_creation=False,
- catchup=False,
-) as dag:
- ingest_task = PythonVirtualenvOperator(
- task_id="ingest_redshift",
- python_callable=metadata_ingestion_workflow,
- requirements=['openmetadata-ingestion==1.0.5.0',
- 'apache-airflow==2.4.3', # note, v2.4.3 is the first version that does not conflict with OpenMetadata's 'tabulate' requirements
- 'apache-airflow-providers-amazon==6.0.0', # Amazon Airflow provider is necessary for MWAA
- 'watchtower',],
- system_site_packages=False,
- dag=dag,
- )
-```
-
-Where you can update the YAML configuration and workflow classes accordingly. accordingly. Further examples on how to
-run the ingestion can be found on the documentation (e.g., [Snowflake](/v1.12.x/connectors/database/snowflake)).
-
-You will also need to determine the OpenMetadata ingestion extras and Airflow providers you need. Note that the Openmetadata version needs to match the server version. If we are using the server at 0.12.2, then the ingestion package needs to also be 0.12.2. An example of the extras would look like this `openmetadata-ingestion[mysql,snowflake,s3]==0.12.2.2`.
-For Airflow providers, you will want to pull the provider versions from [the matching constraints file](https://raw.githubusercontent.com/apache/airflow/constraints-2.4.3/constraints-3.7.txt). Since this example installs Airflow Providers v2.4.3 on Python 3.7, we use that constraints file.
-
-Also note that the ingestion workflow function must be entirely self-contained as it will run by itself in the virtualenv. Any imports it needs, including the configuration, must exist within the function itself.
-
-
+
+For the Python VirtualenvOperator approach, see [MWAA with Python VirtualenvOperator](/v1.12.x/deployment/ingestion/external/mwaa/virtualenv).
+
diff --git a/v1.12.x/deployment/ingestion/external/mwaa/virtualenv.mdx b/v1.12.x/deployment/ingestion/external/mwaa/virtualenv.mdx
new file mode 100644
index 00000000..12d8c700
--- /dev/null
+++ b/v1.12.x/deployment/ingestion/external/mwaa/virtualenv.mdx
@@ -0,0 +1,136 @@
+---
+title: MWAA Ingestion with Python VirtualenvOperator | Official Documentation
+description: Configure the PythonVirtualenvOperator in AWS MWAA for isolated ingestion workflow execution without library conflicts.
+sidebarTitle: Python VirtualenvOperator
+collate: false
+---
+
+import RunConnectorsClass from '/snippets/deployment/run-connectors-class.mdx'
+
+# Ingestion Workflows as a Python Virtualenv Operator
+
+## PROs
+
+- Installation does not clash with existing libraries
+- Simpler than ECS
+
+## CONs
+
+- We need to install an additional plugin in MWAA
+- DAGs take longer to run due to needing to set up the virtualenv from scratch for each run.
+
+We need to update the `requirements.txt` file from the MWAA environment to add the following line:
+
+```
+virtualenv
+```
+
+Then, we need to set up a custom plugin in MWAA. Create a file named virtual_python_plugin.py. Note that you may need to update the python version (eg, python3.7 -> python3.10) depending on what your MWAA environment is running.
+```python
+"""
+Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+the Software, and to permit persons to whom the Software is furnished to do so.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
+COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+"""
+from airflow.plugins_manager import AirflowPlugin
+import airflow.utils.python_virtualenv
+from typing import List
+import os
+
+
+def _generate_virtualenv_cmd(tmp_dir: str, python_bin: str, system_site_packages: bool) -> List[str]:
+ cmd = ['python3', '/usr/local/airflow/.local/lib/python3.7/site-packages/virtualenv', tmp_dir]
+ if system_site_packages:
+ cmd.append('--system-site-packages')
+ if python_bin is not None:
+ cmd.append(f'--python={python_bin}')
+ return cmd
+
+
+airflow.utils.python_virtualenv._generate_virtualenv_cmd = _generate_virtualenv_cmd
+
+os.environ["PATH"] = f"/usr/local/airflow/.local/bin:{os.environ['PATH']}"
+
+
+class VirtualPythonPlugin(AirflowPlugin):
+ name = 'virtual_python_plugin'
+```
+
+This is modified from the [AWS sample](https://docs.aws.amazon.com/mwaa/latest/userguide/samples-virtualenv.html).
+
+Next, create the plugins.zip file and upload it according to [AWS docs](https://docs.aws.amazon.com/mwaa/latest/userguide/configuring-dag-import-plugins.html). You will also need to [disable lazy plugin loading in MWAA](https://docs.aws.amazon.com/mwaa/latest/userguide/samples-virtualenv.html#samples-virtualenv-airflow-config).
+
+A DAG deployed using the PythonVirtualenvOperator would then look like:
+
+```python
+from datetime import timedelta
+
+from airflow import DAG
+
+from airflow.operators.python import PythonVirtualenvOperator
+
+from airflow.utils.dates import days_ago
+
+
+default_args = {
+ "retries": 3,
+ "retry_delay": timedelta(seconds=10),
+ "execution_timeout": timedelta(minutes=60),
+}
+
+def metadata_ingestion_workflow():
+ from metadata.workflow.metadata import MetadataWorkflow
+
+
+ import yaml
+
+ config = """
+YAML config
+ """
+ workflow_config = yaml.safe_load(config)
+ workflow = MetadataWorkflow.create(workflow_config)
+ workflow.execute()
+ workflow.raise_from_status()
+ workflow.print_status()
+ workflow.stop()
+
+with DAG(
+ "redshift_ingestion",
+ default_args=default_args,
+ description="An example DAG which runs a OpenMetadata ingestion workflow",
+ start_date=days_ago(1),
+ is_paused_upon_creation=False,
+ catchup=False,
+) as dag:
+ ingest_task = PythonVirtualenvOperator(
+ task_id="ingest_redshift",
+ python_callable=metadata_ingestion_workflow,
+ requirements=['openmetadata-ingestion==1.0.5.0',
+ 'apache-airflow==2.4.3', # note, v2.4.3 is the first version that does not conflict with OpenMetadata's 'tabulate' requirements
+ 'apache-airflow-providers-amazon==6.0.0', # Amazon Airflow provider is necessary for MWAA
+ 'watchtower',],
+ system_site_packages=False,
+ dag=dag,
+ )
+```
+
+Where you can update the YAML configuration and workflow classes accordingly. Further examples on how to
+run the ingestion can be found on the documentation (e.g., [Snowflake](/v1.12.x/connectors/database/snowflake)).
+
+You will also need to determine the OpenMetadata ingestion extras and Airflow providers you need. Note that the Openmetadata version needs to match the server version. If we are using the server at 0.12.2, then the ingestion package needs to also be 0.12.2. An example of the extras would look like this `openmetadata-ingestion[mysql,snowflake,s3]==0.12.2.2`.
+For Airflow providers, you will want to pull the provider versions from [the matching constraints file](https://raw.githubusercontent.com/apache/airflow/constraints-2.4.3/constraints-3.7.txt). Since this example installs Airflow Providers v2.4.3 on Python 3.7, we use that constraints file.
+
+Also note that the ingestion workflow function must be entirely self-contained as it will run by itself in the virtualenv. Any imports it needs, including the configuration, must exist within the function itself.
+
+
diff --git a/v1.12.x/deployment/kubernetes/gke.mdx b/v1.12.x/deployment/kubernetes/gke.mdx
index e553535c..28214f9a 100644
--- a/v1.12.x/deployment/kubernetes/gke.mdx
+++ b/v1.12.x/deployment/kubernetes/gke.mdx
@@ -1,7 +1,7 @@
---
title: Kubernetes GKE Deployment | Official Documentation
description: Run your deployment on Google Kubernetes Engine (GKE) for a reliable, managed Kubernetes experience with secure configurations.
-sidebarTitle: Gke
+sidebarTitle: Overview
collate: false
---
@@ -172,317 +172,7 @@ kubectl get pods
# Navigate to Settings → Preferences → Health
```
----
-
-## Using Airflow Orchestrator (Alternative)
-
-If you prefer to use Apache Airflow as the orchestrator (e.g., for existing Airflow investments or complex DAG requirements), follow the configuration below.
-
-
-Using Airflow requires additional infrastructure: persistent volumes with ReadWriteMany access, the openmetadata-dependencies Helm chart, and more complex configuration.
-
-
-### Persistent Volumes with ReadWriteMany Access Modes
-
-OpenMetadata helm chart depends on Airflow and Airflow expects a persistent disk that support ReadWriteMany (the volume can be mounted as read-write by many nodes).
-
-The workaround is to create nfs-server disk on Google Kubernetes Engine and use that as the persistent claim and deploy OpenMetadata by implementing the following steps in order.
-
-### Create NFS Share
-
-#### Provision GCP Persistent Disk for Google Kubernetes Engine
-
-Run the below command to create a gcloud compute zonal disk. For more information on Google Cloud Disk Options, please visit [here](https://cloud.google.com/compute/docs/disks).
-
-```commandline
-gcloud compute disks create --size=100GB --zone= nfs-disk
-```
-
-#### Deploy NFS Server in GKE
-
-```yaml
-# nfs-server-deployment.yml
-apiVersion: apps/v1
-kind: Deployment
-metadata:
- name: nfs-server
-spec:
- replicas: 1
- selector:
- matchLabels:
- role: nfs-server
- template:
- metadata:
- labels:
- role: nfs-server
- spec:
- initContainers:
- - name: init-airflow-directories
- image: busybox
- command: ['sh', '-c', 'mkdir -p /exports/airflow-dags /exports/airflow-logs']
- volumeMounts:
- - mountPath: /exports
- name: nfs-pvc
- containers:
- - name: nfs-server
- image: itsthenetwork/nfs-server-alpine
- env:
- - name: SHARED_DIRECTORY
- value: /exports
- ports:
- - name: nfs
- containerPort: 2049
- securityContext:
- privileged: true
- volumeMounts:
- - mountPath: /exports
- name: nfs-pvc
- volumes:
- - name: nfs-pvc
- gcePersistentDisk:
- pdName: nfs-disk
- fsType: ext4
----
-# nfs-cluster-ip-service.yml
-apiVersion: v1
-kind: Service
-metadata:
- name: nfs-server
-spec:
- ports:
- - name: nfs
- port: 2049
- selector:
- role: nfs-server
-```
-Run the commands below and ensure the pods are running.
-
-```commandline
-kubectl create -f nfs-server-deployment.yml
-kubectl create -f nfs-cluster-ip-service.yml
-```
-
-We create a ClusterIP Service for pods to access NFS within the cluster at a fixed IP/DNS.
-
-#### Provision NFS backed PV and PVC for Airflow DAGs and Airflow Logs
-
-Update `` with the NFS Service Cluster IP Address for below code snippets.
-You can get the clusterIP using the following command
-
-```commandline
-kubectl get service nfs-server -o jsonpath='{.spec.clusterIP}'
-```
-
-#### Code Samples for PV and PVC for Airflow DAGs
-
-```yaml
-# dags_pv_pvc.yml
-apiVersion: v1
-kind: PersistentVolume
-metadata:
- name: openmetadata-dependencies-dags-pv
-spec:
- capacity:
- storage: 10Gi
- accessModes:
- - ReadWriteMany
- nfs:
- server:
- path: "/airflow-dags"
-
----
-apiVersion: v1
-kind: PersistentVolumeClaim
-metadata:
- labels:
- app: airflow
- release: openmetadata-dependencies
- name: openmetadata-dependencies-dags
- namespace: default
-spec:
- accessModes:
- - ReadWriteMany
- resources:
- requests:
- storage: 10Gi
- storageClassName: ""
-```
-
-Create Persistent Volumes and Persistent Volume claims with the below command.
-
-```commandline
-kubectl create -f dags_pv_pvc.yml
-```
-
-#### Code Samples for PV and PVC for Airflow Logs
-
-```yaml
-# logs_pv_pvc.yml
-apiVersion: v1
-kind: PersistentVolume
-metadata:
- name: openmetadata-dependencies-logs-pv
-spec:
- capacity:
- storage: 10Gi
- accessModes:
- - ReadWriteMany
- nfs:
- server:
- path: "/airflow-logs"
-
----
-apiVersion: v1
-kind: PersistentVolumeClaim
-metadata:
- labels:
- app: airflow
- name: openmetadata-dependencies-logs
- namespace: default
-spec:
- accessModes:
- - ReadWriteMany
- resources:
- requests:
- storage: 10Gi
- storageClassName: ""
-```
-
-Create Persistent Volumes and Persistent Volume claims with the below command.
-
-```commandline
-kubectl create -f logs_pv_pvc.yml
-```
-### Change owner and permission manually on disks
-
-Since airflow pods run as non root users, they would not have write access on the nfs server volumes. In order to fix the permission here, spin up a pod with persistent volumes attached and run it once.
-
-```yaml
-# permissions_pod.yml
-apiVersion: v1
-kind: Pod
-metadata:
- creationTimestamp: null
- labels:
- run: my-permission-pod
- name: my-permission-pod
-spec:
- containers:
- - image: nginx
- name: my-permission-pod
- volumeMounts:
- - name: airflow-dags
- mountPath: /airflow-dags
- - name: airflow-logs
- mountPath: /airflow-logs
- volumes:
- - name: airflow-logs
- persistentVolumeClaim:
- claimName: openmetadata-dependencies-logs
- - name: airflow-dags
- persistentVolumeClaim:
- claimName: openmetadata-dependencies-dags
- dnsPolicy: ClusterFirst
- restartPolicy: Always
-```
-
-
-
-Airflow runs the pods with linux user name as airflow and linux user id as 50000.
-
-
-
-Run the below command to create the pod and fix the permissions
-
-```commandline
-kubectl create -f permissions_pod.yml
-```
-
-Once the permissions pod is up and running, execute the below commands within the container.
-
-```commandline
-kubectl exec --tty my-permission-pod --container my-permission-pod -- chown -R 50000 /airflow-dags /airflow-logs
-# If needed
-kubectl exec --tty my-permission-pod --container my-permission-pod -- chmod -R a+rwx /airflow-dags
-```
-
-### Create OpenMetadata dependencies Values
-
-Override openmetadata dependencies airflow helm values to bind the nfs persistent volumes for DAGs and logs.
-
-```yaml
-# values-dependencies.yml
-airflow:
- airflow:
- extraVolumeMounts:
- - mountPath: /airflow-logs
- name: nfs-airflow-logs
- - mountPath: /airflow-dags/dags
- name: nfs-airflow-dags
- extraVolumes:
- - name: nfs-airflow-logs
- persistentVolumeClaim:
- claimName: openmetadata-dependencies-logs
- - name: nfs-airflow-dags
- persistentVolumeClaim:
- claimName: openmetadata-dependencies-dags
- config:
- AIRFLOW__OPENMETADATA_AIRFLOW_APIS__DAG_GENERATED_CONFIGS: "/airflow-dags/dags"
- dags:
- path: /airflow-dags/dags
- persistence:
- enabled: false
- logs:
- path: /airflow-logs
- persistence:
- enabled: false
-```
-
-
-For more information on airflow helm chart values, please refer to [airflow-helm](https://artifacthub.io/packages/helm/airflow-helm/airflow/8.8.0).
-
-When deploying openmeteadata dependencies helm chart, use the below command -
-
-```commandline
-helm install openmetadata-dependencies open-metadata/openmetadata-dependencies --values values-dependencies.yaml
-```
-
-
-
-The above command uses configurations defined [here](https://raw.githubusercontent.com/open-metadata/openmetadata-helm-charts/main/charts/deps/values.yaml).
-You can modify any configuration and deploy by passing your own `values.yaml`
-
-```commandline
-helm install openmetadata-dependencies open-metadata/openmetadata-dependencies --values
-```
-
-Once the openmetadata dependencies helm chart deployed, you can then run the below command to install the openmetadata helm chart -
-
-```commandline
-helm install openmetadata open-metadata/openmetadata --values
-```
-
-Make sure to create CloudSQL and ElasticSearch credentials as Kubernetes Secrets mentioned [here](/v1.12.x/quick-start/local-kubernetes-deployment#2.-create-kubernetes-secrets-required-for-helm-charts).
-
-Also, disable MySQL and ElasticSearch from OpenMetadata Dependencies Helm Charts as mentioned in the FAQs [here](#how-to-disable-mysql-and-elasticsearch-from-openmetadata-dependencies-helm-charts).
-
-## Troubleshooting
-
-### Pods are stuck in Pending State due to Persistent Volume Creation Failure
-
-If you came across `invalid access type while creating the pvc`, and the permission pod is stuck in "pending" state.
-
-The above error might have occurred due to the pvc volumes not setup or pvc volumes are not mounted properly.
-
-
-
-
-Please validate:
-- all the prerequisites mentioned in this [section](#prerequisites)
-- the configuration of `dags_pv_pvc.yml` file
-- `storageClassName` field in YAML file
-
-## FAQs
-
-
\ No newline at end of file
+
+For deployments using Apache Airflow as the orchestrator, see the [GKE Airflow Orchestrator](/v1.12.x/deployment/kubernetes/gke/airflow) guide.
+
\ No newline at end of file
diff --git a/v1.12.x/deployment/kubernetes/gke/airflow.mdx b/v1.12.x/deployment/kubernetes/gke/airflow.mdx
new file mode 100644
index 00000000..ab90ecef
--- /dev/null
+++ b/v1.12.x/deployment/kubernetes/gke/airflow.mdx
@@ -0,0 +1,321 @@
+---
+title: GKE with Apache Airflow Orchestrator | Official Documentation
+description: Configure Apache Airflow as the ingestion orchestrator on Google Kubernetes Engine with NFS persistent volumes and ReadWriteMany access.
+sidebarTitle: Airflow Orchestrator
+collate: false
+---
+
+import Faqs from '/snippets/deployment/faqs.mdx'
+
+# GKE with Apache Airflow Orchestrator
+
+If you prefer to use Apache Airflow as the orchestrator (e.g., for existing Airflow investments or complex DAG requirements), follow the configuration below.
+
+
+Using Airflow requires additional infrastructure: persistent volumes with ReadWriteMany access, the openmetadata-dependencies Helm chart, and more complex configuration.
+
+
+## Persistent Volumes with ReadWriteMany Access Modes
+
+OpenMetadata helm chart depends on Airflow and Airflow expects a persistent disk that support ReadWriteMany (the volume can be mounted as read-write by many nodes).
+
+The workaround is to create nfs-server disk on Google Kubernetes Engine and use that as the persistent claim and deploy OpenMetadata by implementing the following steps in order.
+
+### Create NFS Share
+
+#### Provision GCP Persistent Disk for Google Kubernetes Engine
+
+Run the below command to create a gcloud compute zonal disk. For more information on Google Cloud Disk Options, please visit [here](https://cloud.google.com/compute/docs/disks).
+
+```commandline
+gcloud compute disks create --size=100GB --zone= nfs-disk
+```
+
+#### Deploy NFS Server in GKE
+
+```yaml
+# nfs-server-deployment.yml
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+ name: nfs-server
+spec:
+ replicas: 1
+ selector:
+ matchLabels:
+ role: nfs-server
+ template:
+ metadata:
+ labels:
+ role: nfs-server
+ spec:
+ initContainers:
+ - name: init-airflow-directories
+ image: busybox
+ command: ['sh', '-c', 'mkdir -p /exports/airflow-dags /exports/airflow-logs']
+ volumeMounts:
+ - mountPath: /exports
+ name: nfs-pvc
+ containers:
+ - name: nfs-server
+ image: itsthenetwork/nfs-server-alpine
+ env:
+ - name: SHARED_DIRECTORY
+ value: /exports
+ ports:
+ - name: nfs
+ containerPort: 2049
+ securityContext:
+ privileged: true
+ volumeMounts:
+ - mountPath: /exports
+ name: nfs-pvc
+ volumes:
+ - name: nfs-pvc
+ gcePersistentDisk:
+ pdName: nfs-disk
+ fsType: ext4
+---
+# nfs-cluster-ip-service.yml
+apiVersion: v1
+kind: Service
+metadata:
+ name: nfs-server
+spec:
+ ports:
+ - name: nfs
+ port: 2049
+ selector:
+ role: nfs-server
+```
+Run the commands below and ensure the pods are running.
+
+```commandline
+kubectl create -f nfs-server-deployment.yml
+kubectl create -f nfs-cluster-ip-service.yml
+```
+
+We create a ClusterIP Service for pods to access NFS within the cluster at a fixed IP/DNS.
+
+#### Provision NFS backed PV and PVC for Airflow DAGs and Airflow Logs
+
+Update `` with the NFS Service Cluster IP Address for below code snippets.
+You can get the clusterIP using the following command
+
+```commandline
+kubectl get service nfs-server -o jsonpath='{.spec.clusterIP}'
+```
+
+#### Code Samples for PV and PVC for Airflow DAGs
+
+```yaml
+# dags_pv_pvc.yml
+apiVersion: v1
+kind: PersistentVolume
+metadata:
+ name: openmetadata-dependencies-dags-pv
+spec:
+ capacity:
+ storage: 10Gi
+ accessModes:
+ - ReadWriteMany
+ nfs:
+ server:
+ path: "/airflow-dags"
+
+---
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+ labels:
+ app: airflow
+ release: openmetadata-dependencies
+ name: openmetadata-dependencies-dags
+ namespace: default
+spec:
+ accessModes:
+ - ReadWriteMany
+ resources:
+ requests:
+ storage: 10Gi
+ storageClassName: ""
+```
+
+Create Persistent Volumes and Persistent Volume claims with the below command.
+
+```commandline
+kubectl create -f dags_pv_pvc.yml
+```
+
+#### Code Samples for PV and PVC for Airflow Logs
+
+```yaml
+# logs_pv_pvc.yml
+apiVersion: v1
+kind: PersistentVolume
+metadata:
+ name: openmetadata-dependencies-logs-pv
+spec:
+ capacity:
+ storage: 10Gi
+ accessModes:
+ - ReadWriteMany
+ nfs:
+ server:
+ path: "/airflow-logs"
+
+---
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+ labels:
+ app: airflow
+ name: openmetadata-dependencies-logs
+ namespace: default
+spec:
+ accessModes:
+ - ReadWriteMany
+ resources:
+ requests:
+ storage: 10Gi
+ storageClassName: ""
+```
+
+Create Persistent Volumes and Persistent Volume claims with the below command.
+
+```commandline
+kubectl create -f logs_pv_pvc.yml
+```
+
+### Change owner and permission manually on disks
+
+Since airflow pods run as non root users, they would not have write access on the nfs server volumes. In order to fix the permission here, spin up a pod with persistent volumes attached and run it once.
+
+```yaml
+# permissions_pod.yml
+apiVersion: v1
+kind: Pod
+metadata:
+ creationTimestamp: null
+ labels:
+ run: my-permission-pod
+ name: my-permission-pod
+spec:
+ containers:
+ - image: nginx
+ name: my-permission-pod
+ volumeMounts:
+ - name: airflow-dags
+ mountPath: /airflow-dags
+ - name: airflow-logs
+ mountPath: /airflow-logs
+ volumes:
+ - name: airflow-logs
+ persistentVolumeClaim:
+ claimName: openmetadata-dependencies-logs
+ - name: airflow-dags
+ persistentVolumeClaim:
+ claimName: openmetadata-dependencies-dags
+ dnsPolicy: ClusterFirst
+ restartPolicy: Always
+```
+
+
+
+Airflow runs the pods with linux user name as airflow and linux user id as 50000.
+
+
+
+Run the below command to create the pod and fix the permissions
+
+```commandline
+kubectl create -f permissions_pod.yml
+```
+
+Once the permissions pod is up and running, execute the below commands within the container.
+
+```commandline
+kubectl exec --tty my-permission-pod --container my-permission-pod -- chown -R 50000 /airflow-dags /airflow-logs
+# If needed
+kubectl exec --tty my-permission-pod --container my-permission-pod -- chmod -R a+rwx /airflow-dags
+```
+
+### Create OpenMetadata dependencies Values
+
+Override openmetadata dependencies airflow helm values to bind the nfs persistent volumes for DAGs and logs.
+
+```yaml
+# values-dependencies.yml
+airflow:
+ airflow:
+ extraVolumeMounts:
+ - mountPath: /airflow-logs
+ name: nfs-airflow-logs
+ - mountPath: /airflow-dags/dags
+ name: nfs-airflow-dags
+ extraVolumes:
+ - name: nfs-airflow-logs
+ persistentVolumeClaim:
+ claimName: openmetadata-dependencies-logs
+ - name: nfs-airflow-dags
+ persistentVolumeClaim:
+ claimName: openmetadata-dependencies-dags
+ config:
+ AIRFLOW__OPENMETADATA_AIRFLOW_APIS__DAG_GENERATED_CONFIGS: "/airflow-dags/dags"
+ dags:
+ path: /airflow-dags/dags
+ persistence:
+ enabled: false
+ logs:
+ path: /airflow-logs
+ persistence:
+ enabled: false
+```
+
+
+For more information on airflow helm chart values, please refer to [airflow-helm](https://artifacthub.io/packages/helm/airflow-helm/airflow/8.8.0).
+
+When deploying openmeteadata dependencies helm chart, use the below command -
+
+```commandline
+helm install openmetadata-dependencies open-metadata/openmetadata-dependencies --values values-dependencies.yaml
+```
+
+
+
+The above command uses configurations defined [here](https://raw.githubusercontent.com/open-metadata/openmetadata-helm-charts/main/charts/deps/values.yaml).
+You can modify any configuration and deploy by passing your own `values.yaml`
+
+```commandline
+helm install openmetadata-dependencies open-metadata/openmetadata-dependencies --values
+```
+
+Once the openmetadata dependencies helm chart deployed, you can then run the below command to install the openmetadata helm chart -
+
+```commandline
+helm install openmetadata open-metadata/openmetadata --values
+```
+
+Make sure to create CloudSQL and ElasticSearch credentials as Kubernetes Secrets mentioned [here](/v1.12.x/quick-start/local-kubernetes-deployment#2.-create-kubernetes-secrets-required-for-helm-charts).
+
+Also, disable MySQL and ElasticSearch from OpenMetadata Dependencies Helm Charts as mentioned in the FAQs [here](#how-to-disable-mysql-and-elasticsearch-from-openmetadata-dependencies-helm-charts).
+
+## Troubleshooting
+
+### Pods are stuck in Pending State due to Persistent Volume Creation Failure
+
+If you came across `invalid access type while creating the pvc`, and the permission pod is stuck in "pending" state.
+
+The above error might have occurred due to the pvc volumes not setup or pvc volumes are not mounted properly.
+
+
+
+
+Please validate:
+- all the prerequisites mentioned in this [section](#prerequisites)
+- the configuration of `dags_pv_pvc.yml` file
+- `storageClassName` field in YAML file
+
+## FAQs
+
+
diff --git a/v1.12.x/how-to-guides/data-quality-observability/quality/data-quality-as-code/test-runner.mdx b/v1.12.x/how-to-guides/data-quality-observability/quality/data-quality-as-code/test-runner.mdx
index 810e1ed4..dc1ea5fb 100644
--- a/v1.12.x/how-to-guides/data-quality-observability/quality/data-quality-as-code/test-runner.mdx
+++ b/v1.12.x/how-to-guides/data-quality-observability/quality/data-quality-as-code/test-runner.mdx
@@ -1,7 +1,7 @@
---
title: TestRunner - Running Table-Level Tests
description: Execute data quality tests against tables in OpenMetadata using the TestRunner API
-sidebarTitle: Test Runner
+sidebarTitle: Overview
---
# TestRunner - Running Table-Level Tests
@@ -20,7 +20,6 @@ The `TestRunner` class provides a fluent API for executing data quality tests ag
- [Integration with ETL Workflows](#integration-with-etl-workflows)
- [Error Handling](#error-handling)
- [Best Practices](#best-practices)
-- [Using External Secrets Managers](#using-external-secrets-managers)
- [Next Steps](#next-steps)
@@ -408,178 +407,10 @@ except Exception as e:
5. **Combine table and column tests**: Ensure both structural and content quality
-## Using External Secrets Managers
-### Important Note
-
-If your OpenMetadata instance uses **database-stored credentials** (the default configuration), you do not need to follow this guide. The SDK will automatically retrieve and decrypt credentials.
-
-This guide is only necessary when your organization uses an **external secrets manager** for credential storage.
-
-### Why This is Required
-
-The `TestRunner` API executes data quality tests directly from your Python code (e.g., within your ETL pipelines). To connect to your data sources, it needs to:
-
-1. Retrieve the service connection configuration from OpenMetadata
-2. Decrypt the credentials stored in your secrets manager
-3. Establish a connection to the data source
-4. Execute the test cases
-
-Without proper secrets manager configuration, the SDK cannot decrypt credentials and will fail to connect to your data sources.
-
-### General Setup Steps
-
-1. **Contact your OpenMetadata/OpenMetadata administrator** to obtain:
- - The secrets manager type (AWS, Azure, GCP, etc.)
- - The secrets manager loader configuration
- - Required environment variables or configuration files
- - Any additional setup (IAM roles, service principals, etc.)
-
-2. **Install required dependencies** for your secrets manager provider
-
-3. **Configure environment variables** with access credentials
-
-4. **Initialize the SecretsManagerFactory** before using TestRunner
-
-5. **Configure the SDK** and run your tests
-
-### Example using AWS Secrets Manager
-
-**Required Dependencies:**
-```bash
-pip install "openmetadata-ingestion[aws]>=1.12.0.0"
-```
-
-**Example Configuration:**
-```python
-import os
-
-from metadata.generated.schema.security.secrets.secretsManagerClientLoader import SecretsManagerClientLoader
-from metadata.generated.schema.security.secrets.secretsManagerProvider import SecretsManagerProvider
-from metadata.sdk import configure
-from metadata.sdk.data_quality import TestRunner
-from metadata.utils.secrets.secrets_manager_factory import SecretsManagerFactory
-
-# Set AWS credentials and region
-os.environ["AWS_ACCESS_KEY_ID"] = "your-access-key-id"
-os.environ["AWS_SECRET_ACCESS_KEY"] = "your-secret-access-key"
-os.environ["AWS_DEFAULT_REGION"] = "us-east-1" # Your AWS region
-
-# Initialize secrets manager (must be done before configure())
-SecretsManagerFactory(
- secrets_manager_provider=SecretsManagerProvider.managed_aws,
- secrets_manager_loader=SecretsManagerClientLoader.env,
-)
-
-# Configure OpenMetadata SDK
-configure(
- host="https://your-openmetadata-instance.com/api",
- jwt_token="your-jwt-token",
-)
-
-# Use TestRunner as normal
-runner = TestRunner.for_table("MySQL.production.database.my_table")
-results = runner.run()
-```
-
-### Configuration by Provider
-
-#### AWS and AWS Parameters Store
-
-**OpenMetadata's ingestion extras**: `aws` (e.g `pip install 'openmetadata-ingestion[aws]'`)
-
-**SecretsManagerProvider: (one of)**
-- `SecretsManagerProvider.aws`
-- `SecretsManagerProvider.managed_aws`
-- `SecretsManagerProvider.aws_ssm`
-- `SecretsManagerProvider.managed_aws_ssm`
-
-**Environment variables:**
-- `AWS_ACCESS_KEY_ID`
-- `AWS_SECRET_ACCESS_KEY`
-- `AWS_DEFAULT_REGION`
-
-#### Azure Key Vault
-
-**OpenMetadata's ingestion extras**: `azure` (e.g `pip install 'openmetadata-ingestion[azure]'`)
-
-**SecretsManagerProvider: (one of)**
-- `SecretsManagerProvider.azure_kv`
-- `SecretsManagerProvider.managed_azure_kv`
-
-**Environment variables:**
-- `AZURE_CLIENT_ID`
-- `AZURE_CLIENT_SECRET`
-- `AZURE_TENANT_ID`
-- `AZURE_KEY_VAULT_NAME`
-
-#### Google Cloud Secret Manager
-
-**OpenMetadata's ingestion extras**: `gcp` (e.g `pip install 'openmetadata-ingestion[gcp]'`)
-
-**SecretsManagerProvider:** `SecretsManagerProvider.gcp`
-
-**Environment variables:**
-- `GOOGLE_APPLICATION_CREDENTIALS`: path to the file with the credentials json file
-- `GCP_PROJECT_ID`
-
-### Troubleshooting
-
-#### Error: "Cannot decrypt service connection"
-
-**Cause**: Secrets manager not initialized or misconfigured
-
-**Solution**: Ensure `SecretsManagerFactory` is initialized **before** calling `configure()` or creating the `TestRunner`
-
-#### Error: "Access Denied" or "Unauthorized"
-
-**Cause**: Insufficient permissions to access secrets
-
-**Solution**:
-- Verify IAM role/service principal has correct permissions
-- Check credentials are valid and not expired
-- Ensure correct region/vault name is specified
-
-#### Error: "Module not found" for secrets manager
-
-**Cause**: Missing dependencies for your secrets manager
-
-**Solution**: Install required extras:
-```bash
-# For AWS
-pip install "openmetadata-ingestion[aws]"
-
-# For Azure
-pip install "openmetadata-ingestion[azure]"
-
-# For GCP
-pip install "openmetadata-ingestion[gcp]"
-```
-
-#### Tests Fail with Connection Errors
-
-**Cause**: Credentials not properly decrypted or secrets manager misconfigured
-
-**Solution**:
-1. Verify secrets manager provider matches your OpenMetadata backend configuration
-2. Test credential access independently (e.g., using AWS CLI, Azure CLI, gcloud)
-3. Check network connectivity to secrets manager service
-4. Enable debug logging to see detailed error messages:
-
-```python
-import logging
-logging.basicConfig(level=logging.DEBUG)
-```
-
-### Contact Your Administrator
-
-If you're unsure about:
-- Which secrets manager your organization uses
-- Required environment variables or configuration
-- Access credentials or IAM roles
-- Permissions needed
-
-**Contact your OpenMetadata or OpenMetadata administrator** for the specific configuration required in your environment.
+
+If your organization uses an external secrets manager (AWS, Azure, GCP), see [External Secrets Managers](/v1.12.x/how-to-guides/data-quality-observability/quality/data-quality-as-code/test-runner/external-secrets) before using the TestRunner API.
+
## Next Steps
diff --git a/v1.12.x/how-to-guides/data-quality-observability/quality/data-quality-as-code/test-runner/external-secrets.mdx b/v1.12.x/how-to-guides/data-quality-observability/quality/data-quality-as-code/test-runner/external-secrets.mdx
new file mode 100644
index 00000000..3e7b70fa
--- /dev/null
+++ b/v1.12.x/how-to-guides/data-quality-observability/quality/data-quality-as-code/test-runner/external-secrets.mdx
@@ -0,0 +1,182 @@
+---
+title: External Secrets Managers | TestRunner
+description: Configure external secrets managers (AWS, Azure, GCP) for the TestRunner API when your OpenMetadata instance uses an external secrets manager.
+sidebarTitle: External Secrets
+---
+
+# Using External Secrets Managers
+
+## Important Note
+
+If your OpenMetadata instance uses **database-stored credentials** (the default configuration), you do not need to follow this guide. The SDK will automatically retrieve and decrypt credentials.
+
+This guide is only necessary when your organization uses an **external secrets manager** for credential storage.
+
+
+⚠️ If you're using OpenMetadata Cloud to run OpenMetadata, please refer to this guide to configure your external secrets manager before using the TestRunner API.
+
+
+## Why This is Required
+
+The `TestRunner` API executes data quality tests directly from your Python code (e.g., within your ETL pipelines). To connect to your data sources, it needs to:
+
+1. Retrieve the service connection configuration from OpenMetadata
+2. Decrypt the credentials stored in your secrets manager
+3. Establish a connection to the data source
+4. Execute the test cases
+
+Without proper secrets manager configuration, the SDK cannot decrypt credentials and will fail to connect to your data sources.
+
+## General Setup Steps
+
+1. **Contact your OpenMetadata/OpenMetadata administrator** to obtain:
+ - The secrets manager type (AWS, Azure, GCP, etc.)
+ - The secrets manager loader configuration
+ - Required environment variables or configuration files
+ - Any additional setup (IAM roles, service principals, etc.)
+
+2. **Install required dependencies** for your secrets manager provider
+
+3. **Configure environment variables** with access credentials
+
+4. **Initialize the SecretsManagerFactory** before using TestRunner
+
+5. **Configure the SDK** and run your tests
+
+## Example using AWS Secrets Manager
+
+**Required Dependencies:**
+```bash
+pip install "openmetadata-ingestion[aws]>=1.12.0.0"
+```
+
+**Example Configuration:**
+```python
+import os
+
+from metadata.generated.schema.security.secrets.secretsManagerClientLoader import SecretsManagerClientLoader
+from metadata.generated.schema.security.secrets.secretsManagerProvider import SecretsManagerProvider
+from metadata.sdk import configure
+from metadata.sdk.data_quality import TestRunner
+from metadata.utils.secrets.secrets_manager_factory import SecretsManagerFactory
+
+# Set AWS credentials and region
+os.environ["AWS_ACCESS_KEY_ID"] = "your-access-key-id"
+os.environ["AWS_SECRET_ACCESS_KEY"] = "your-secret-access-key"
+os.environ["AWS_DEFAULT_REGION"] = "us-east-1" # Your AWS region
+
+# Initialize secrets manager (must be done before configure())
+SecretsManagerFactory(
+ secrets_manager_provider=SecretsManagerProvider.managed_aws,
+ secrets_manager_loader=SecretsManagerClientLoader.env,
+)
+
+# Configure OpenMetadata SDK
+configure(
+ host="https://your-openmetadata-instance.com/api",
+ jwt_token="your-jwt-token",
+)
+
+# Use TestRunner as normal
+runner = TestRunner.for_table("MySQL.production.database.my_table")
+results = runner.run()
+```
+
+## Configuration by Provider
+
+### AWS and AWS Systems Manager Parameter Store
+
+**OpenMetadata's ingestion extras**: `aws` (e.g `pip install 'openmetadata-ingestion[aws]'`)
+
+**SecretsManagerProvider: (one of)**
+- `SecretsManagerProvider.aws`
+- `SecretsManagerProvider.managed_aws`
+- `SecretsManagerProvider.aws_ssm`
+- `SecretsManagerProvider.managed_aws_ssm`
+
+**Environment variables:**
+- `AWS_ACCESS_KEY_ID`
+- `AWS_SECRET_ACCESS_KEY`
+- `AWS_DEFAULT_REGION`
+
+### Azure Key Vault
+
+**OpenMetadata's ingestion extras**: `azure` (e.g `pip install 'openmetadata-ingestion[azure]'`)
+
+**SecretsManagerProvider: (one of)**
+- `SecretsManagerProvider.azure_kv`
+- `SecretsManagerProvider.managed_azure_kv`
+
+**Environment variables:**
+- `AZURE_CLIENT_ID`
+- `AZURE_CLIENT_SECRET`
+- `AZURE_TENANT_ID`
+- `AZURE_KEY_VAULT_NAME`
+
+### Google Cloud Secret Manager
+
+**OpenMetadata's ingestion extras**: `gcp` (e.g `pip install 'openmetadata-ingestion[gcp]'`)
+
+**SecretsManagerProvider:** `SecretsManagerProvider.gcp`
+
+**Environment variables:**
+- `GOOGLE_APPLICATION_CREDENTIALS`: path to the file with the credentials json file
+- `GCP_PROJECT_ID`
+
+## Troubleshooting
+
+### Error: "Cannot decrypt service connection"
+
+**Cause**: Secrets manager not initialized or misconfigured
+
+**Solution**: Ensure `SecretsManagerFactory` is initialized **before** calling `configure()` or creating the `TestRunner`
+
+### Error: "Access Denied" or "Unauthorized"
+
+**Cause**: Insufficient permissions to access secrets
+
+**Solution**:
+- Verify IAM role/service principal has correct permissions
+- Check credentials are valid and not expired
+- Ensure correct region/vault name is specified
+
+### Error: "Module not found" for secrets manager
+
+**Cause**: Missing dependencies for your secrets manager
+
+**Solution**: Install required extras:
+```bash
+# For AWS
+pip install "openmetadata-ingestion[aws]"
+
+# For Azure
+pip install "openmetadata-ingestion[azure]"
+
+# For GCP
+pip install "openmetadata-ingestion[gcp]"
+```
+
+### Tests Fail with Connection Errors
+
+**Cause**: Credentials not properly decrypted or secrets manager misconfigured
+
+**Solution**:
+1. Verify secrets manager provider matches your OpenMetadata backend configuration
+2. Test credential access independently (e.g., using AWS CLI, Azure CLI, gcloud)
+3. Check network connectivity to secrets manager service
+4. Enable debug logging to see detailed error messages:
+
+```python
+import logging
+logging.basicConfig(level=logging.DEBUG)
+```
+
+## Contact Your Administrator
+
+If you're unsure about:
+- Which secrets manager your organization uses
+- Required environment variables or configuration
+- Access credentials or IAM roles
+- Permissions needed
+
+**Contact your OpenMetadata or OpenMetadata administrator** for the specific configuration required in your environment.
diff --git a/v1.13.x-SNAPSHOT/deployment/ingestion/external/mwaa.mdx b/v1.13.x-SNAPSHOT/deployment/ingestion/external/mwaa.mdx
index 42f2569a..d3290f25 100644
--- a/v1.13.x-SNAPSHOT/deployment/ingestion/external/mwaa.mdx
+++ b/v1.13.x-SNAPSHOT/deployment/ingestion/external/mwaa.mdx
@@ -1,7 +1,7 @@
---
title: Run the ingestion from AWS MWAA | Official Documentation
description: Integrate with MWAA to schedule and execute ingestion workflows using managed Airflow on AWS infrastructure.
-sidebarTitle: Mwaa
+sidebarTitle: Overview
collate: false
---
@@ -319,130 +319,6 @@ Moreover, one of the imports will depend on the MWAA Airflow version you are usi
Make sure to update the `ecs_operator_task` task call accordingly.
-## Ingestion Workflows as a Python Virtualenv Operator
-
-### PROs
-
-- Installation does not clash with existing libraries
-- Simpler than ECS
-
-### CONs
-
-- We need to install an additional plugin in MWAA
-- DAGs take longer to run due to needing to set up the virtualenv from scratch for each run.
-
-We need to update the `requirements.txt` file from the MWAA environment to add the following line:
-
-```
-virtualenv
-```
-
-Then, we need to set up a custom plugin in MWAA. Create a file named virtual_python_plugin.py. Note that you may need to update the python version (eg, python3.7 -> python3.10) depending on what your MWAA environment is running.
-```python
-"""
-Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy of
-this software and associated documentation files (the "Software"), to deal in
-the Software without restriction, including without limitation the rights to
-use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
-the Software, and to permit persons to whom the Software is furnished to do so.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
-FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
-COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
-IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-"""
-from airflow.plugins_manager import AirflowPlugin
-import airflow.utils.python_virtualenv
-from typing import List
-import os
-
-
-def _generate_virtualenv_cmd(tmp_dir: str, python_bin: str, system_site_packages: bool) -> List[str]:
- cmd = ['python3', '/usr/local/airflow/.local/lib/python3.7/site-packages/virtualenv', tmp_dir]
- if system_site_packages:
- cmd.append('--system-site-packages')
- if python_bin is not None:
- cmd.append(f'--python={python_bin}')
- return cmd
-
-
-airflow.utils.python_virtualenv._generate_virtualenv_cmd = _generate_virtualenv_cmd
-
-os.environ["PATH"] = f"/usr/local/airflow/.local/bin:{os.environ['PATH']}"
-
-
-class VirtualPythonPlugin(AirflowPlugin):
- name = 'virtual_python_plugin'
-```
-
-This is modified from the [AWS sample](https://docs.aws.amazon.com/mwaa/latest/userguide/samples-virtualenv.html).
-
-Next, create the plugins.zip file and upload it according to [AWS docs](https://docs.aws.amazon.com/mwaa/latest/userguide/configuring-dag-import-plugins.html). You will also need to [disable lazy plugin loading in MWAA](https://docs.aws.amazon.com/mwaa/latest/userguide/samples-virtualenv.html#samples-virtualenv-airflow-config).
-
-A DAG deployed using the PythonVirtualenvOperator would then look like:
-
-```python
-from datetime import timedelta
-
-from airflow import DAG
-
-from airflow.operators.python import PythonVirtualenvOperator
-
-from airflow.utils.dates import days_ago
-
-
-default_args = {
- "retries": 3,
- "retry_delay": timedelta(seconds=10),
- "execution_timeout": timedelta(minutes=60),
-}
-
-def metadata_ingestion_workflow():
- from metadata.workflow.metadata import MetadataWorkflow
-
-
- import yaml
-
- config = """
-YAML config
- """
- workflow_config = yaml.loads(config)
- workflow = MetadataWorkflow.create(workflow_config)
- workflow.execute()
- workflow.raise_from_status()
- workflow.print_status()
- workflow.stop()
-
-with DAG(
- "redshift_ingestion",
- default_args=default_args,
- description="An example DAG which runs a OpenMetadata ingestion workflow",
- start_date=days_ago(1),
- is_paused_upon_creation=False,
- catchup=False,
-) as dag:
- ingest_task = PythonVirtualenvOperator(
- task_id="ingest_redshift",
- python_callable=metadata_ingestion_workflow,
- requirements=['openmetadata-ingestion==1.0.5.0',
- 'apache-airflow==2.4.3', # note, v2.4.3 is the first version that does not conflict with OpenMetadata's 'tabulate' requirements
- 'apache-airflow-providers-amazon==6.0.0', # Amazon Airflow provider is necessary for MWAA
- 'watchtower',],
- system_site_packages=False,
- dag=dag,
- )
-```
-
-Where you can update the YAML configuration and workflow classes accordingly. accordingly. Further examples on how to
-run the ingestion can be found on the documentation (e.g., [Snowflake](/v1.13.x-SNAPSHOT/connectors/database/snowflake)).
-
-You will also need to determine the OpenMetadata ingestion extras and Airflow providers you need. Note that the Openmetadata version needs to match the server version. If we are using the server at 0.12.2, then the ingestion package needs to also be 0.12.2. An example of the extras would look like this `openmetadata-ingestion[mysql,snowflake,s3]==0.12.2.2`.
-For Airflow providers, you will want to pull the provider versions from [the matching constraints file](https://raw.githubusercontent.com/apache/airflow/constraints-2.4.3/constraints-3.7.txt). Since this example installs Airflow Providers v2.4.3 on Python 3.7, we use that constraints file.
-
-Also note that the ingestion workflow function must be entirely self-contained as it will run by itself in the virtualenv. Any imports it needs, including the configuration, must exist within the function itself.
-
-
+
+For the Python VirtualenvOperator approach, see [MWAA with Python VirtualenvOperator](/v1.13.x-SNAPSHOT/deployment/ingestion/external/mwaa/virtualenv).
+
diff --git a/v1.13.x-SNAPSHOT/deployment/ingestion/external/mwaa/virtualenv.mdx b/v1.13.x-SNAPSHOT/deployment/ingestion/external/mwaa/virtualenv.mdx
new file mode 100644
index 00000000..782b0679
--- /dev/null
+++ b/v1.13.x-SNAPSHOT/deployment/ingestion/external/mwaa/virtualenv.mdx
@@ -0,0 +1,136 @@
+---
+title: MWAA Ingestion with Python VirtualenvOperator | Official Documentation
+description: Configure the PythonVirtualenvOperator in AWS MWAA for isolated ingestion workflow execution without library conflicts.
+sidebarTitle: Python VirtualenvOperator
+collate: false
+---
+
+import RunConnectorsClass from '/snippets/deployment/run-connectors-class.mdx'
+
+# Ingestion Workflows as a Python Virtualenv Operator
+
+## PROs
+
+- Installation does not clash with existing libraries
+- Simpler than ECS
+
+## CONs
+
+- We need to install an additional plugin in MWAA
+- DAGs take longer to run due to needing to set up the virtualenv from scratch for each run.
+
+We need to update the `requirements.txt` file from the MWAA environment to add the following line:
+
+```
+virtualenv
+```
+
+Then, we need to set up a custom plugin in MWAA. Create a file named virtual_python_plugin.py. Note that you may need to update the python version (eg, python3.7 -> python3.10) depending on what your MWAA environment is running.
+```python
+"""
+Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+the Software, and to permit persons to whom the Software is furnished to do so.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
+COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+"""
+from airflow.plugins_manager import AirflowPlugin
+import airflow.utils.python_virtualenv
+from typing import List
+import os
+
+
+def _generate_virtualenv_cmd(tmp_dir: str, python_bin: str, system_site_packages: bool) -> List[str]:
+ cmd = ['python3', '/usr/local/airflow/.local/lib/python3.7/site-packages/virtualenv', tmp_dir]
+ if system_site_packages:
+ cmd.append('--system-site-packages')
+ if python_bin is not None:
+ cmd.append(f'--python={python_bin}')
+ return cmd
+
+
+airflow.utils.python_virtualenv._generate_virtualenv_cmd = _generate_virtualenv_cmd
+
+os.environ["PATH"] = f"/usr/local/airflow/.local/bin:{os.environ['PATH']}"
+
+
+class VirtualPythonPlugin(AirflowPlugin):
+ name = 'virtual_python_plugin'
+```
+
+This is modified from the [AWS sample](https://docs.aws.amazon.com/mwaa/latest/userguide/samples-virtualenv.html).
+
+Next, create the plugins.zip file and upload it according to [AWS docs](https://docs.aws.amazon.com/mwaa/latest/userguide/configuring-dag-import-plugins.html). You will also need to [disable lazy plugin loading in MWAA](https://docs.aws.amazon.com/mwaa/latest/userguide/samples-virtualenv.html#samples-virtualenv-airflow-config).
+
+A DAG deployed using the PythonVirtualenvOperator would then look like:
+
+```python
+from datetime import timedelta
+
+from airflow import DAG
+
+from airflow.operators.python import PythonVirtualenvOperator
+
+from airflow.utils.dates import days_ago
+
+
+default_args = {
+ "retries": 3,
+ "retry_delay": timedelta(seconds=10),
+ "execution_timeout": timedelta(minutes=60),
+}
+
+def metadata_ingestion_workflow():
+ from metadata.workflow.metadata import MetadataWorkflow
+
+
+ import yaml
+
+ config = """
+YAML config
+ """
+ workflow_config = yaml.safe_load(config)
+ workflow = MetadataWorkflow.create(workflow_config)
+ workflow.execute()
+ workflow.raise_from_status()
+ workflow.print_status()
+ workflow.stop()
+
+with DAG(
+ "redshift_ingestion",
+ default_args=default_args,
+ description="An example DAG which runs a OpenMetadata ingestion workflow",
+ start_date=days_ago(1),
+ is_paused_upon_creation=False,
+ catchup=False,
+) as dag:
+ ingest_task = PythonVirtualenvOperator(
+ task_id="ingest_redshift",
+ python_callable=metadata_ingestion_workflow,
+ requirements=['openmetadata-ingestion==1.0.5.0',
+ 'apache-airflow==2.4.3', # note, v2.4.3 is the first version that does not conflict with OpenMetadata's 'tabulate' requirements
+ 'apache-airflow-providers-amazon==6.0.0', # Amazon Airflow provider is necessary for MWAA
+ 'watchtower',],
+ system_site_packages=False,
+ dag=dag,
+ )
+```
+
+Where you can update the YAML configuration and workflow classes accordingly. Further examples on how to
+run the ingestion can be found on the documentation (e.g., [Snowflake](/v1.13.x-SNAPSHOT/connectors/database/snowflake)).
+
+You will also need to determine the OpenMetadata ingestion extras and Airflow providers you need. Note that the Openmetadata version needs to match the server version. If we are using the server at 0.12.2, then the ingestion package needs to also be 0.12.2. An example of the extras would look like this `openmetadata-ingestion[mysql,snowflake,s3]==0.12.2.2`.
+For Airflow providers, you will want to pull the provider versions from [the matching constraints file](https://raw.githubusercontent.com/apache/airflow/constraints-2.4.3/constraints-3.7.txt). Since this example installs Airflow Providers v2.4.3 on Python 3.7, we use that constraints file.
+
+Also note that the ingestion workflow function must be entirely self-contained as it will run by itself in the virtualenv. Any imports it needs, including the configuration, must exist within the function itself.
+
+
diff --git a/v1.13.x-SNAPSHOT/deployment/kubernetes/gke.mdx b/v1.13.x-SNAPSHOT/deployment/kubernetes/gke.mdx
index 6269f850..eb9be88d 100644
--- a/v1.13.x-SNAPSHOT/deployment/kubernetes/gke.mdx
+++ b/v1.13.x-SNAPSHOT/deployment/kubernetes/gke.mdx
@@ -1,7 +1,7 @@
---
title: Kubernetes GKE Deployment | Official Documentation
description: Run your deployment on Google Kubernetes Engine (GKE) for a reliable, managed Kubernetes experience with secure configurations.
-sidebarTitle: Gke
+sidebarTitle: Overview
collate: false
---
@@ -172,317 +172,6 @@ kubectl get pods
# Navigate to Settings → Preferences → Health
```
----
-
-## Using Airflow Orchestrator (Alternative)
-
-If you prefer to use Apache Airflow as the orchestrator (e.g., for existing Airflow investments or complex DAG requirements), follow the configuration below.
-
-
-Using Airflow requires additional infrastructure: persistent volumes with ReadWriteMany access, the openmetadata-dependencies Helm chart, and more complex configuration.
-
-
-### Persistent Volumes with ReadWriteMany Access Modes
-
-OpenMetadata helm chart depends on Airflow and Airflow expects a persistent disk that support ReadWriteMany (the volume can be mounted as read-write by many nodes).
-
-The workaround is to create nfs-server disk on Google Kubernetes Engine and use that as the persistent claim and deploy OpenMetadata by implementing the following steps in order.
-
-### Create NFS Share
-
-#### Provision GCP Persistent Disk for Google Kubernetes Engine
-
-Run the below command to create a gcloud compute zonal disk. For more information on Google Cloud Disk Options, please visit [here](https://cloud.google.com/compute/docs/disks).
-
-```commandline
-gcloud compute disks create --size=100GB --zone= nfs-disk
-```
-
-#### Deploy NFS Server in GKE
-
-```yaml
-# nfs-server-deployment.yml
-apiVersion: apps/v1
-kind: Deployment
-metadata:
- name: nfs-server
-spec:
- replicas: 1
- selector:
- matchLabels:
- role: nfs-server
- template:
- metadata:
- labels:
- role: nfs-server
- spec:
- initContainers:
- - name: init-airflow-directories
- image: busybox
- command: ['sh', '-c', 'mkdir -p /exports/airflow-dags /exports/airflow-logs']
- volumeMounts:
- - mountPath: /exports
- name: nfs-pvc
- containers:
- - name: nfs-server
- image: itsthenetwork/nfs-server-alpine
- env:
- - name: SHARED_DIRECTORY
- value: /exports
- ports:
- - name: nfs
- containerPort: 2049
- securityContext:
- privileged: true
- volumeMounts:
- - mountPath: /exports
- name: nfs-pvc
- volumes:
- - name: nfs-pvc
- gcePersistentDisk:
- pdName: nfs-disk
- fsType: ext4
----
-# nfs-cluster-ip-service.yml
-apiVersion: v1
-kind: Service
-metadata:
- name: nfs-server
-spec:
- ports:
- - name: nfs
- port: 2049
- selector:
- role: nfs-server
-```
-Run the commands below and ensure the pods are running.
-
-```commandline
-kubectl create -f nfs-server-deployment.yml
-kubectl create -f nfs-cluster-ip-service.yml
-```
-
-We create a ClusterIP Service for pods to access NFS within the cluster at a fixed IP/DNS.
-
-#### Provision NFS backed PV and PVC for Airflow DAGs and Airflow Logs
-
-Update `` with the NFS Service Cluster IP Address for below code snippets.
-You can get the clusterIP using the following command
-
-```commandline
-kubectl get service nfs-server -o jsonpath='{.spec.clusterIP}'
-```
-
-#### Code Samples for PV and PVC for Airflow DAGs
-
-```yaml
-# dags_pv_pvc.yml
-apiVersion: v1
-kind: PersistentVolume
-metadata:
- name: openmetadata-dependencies-dags-pv
-spec:
- capacity:
- storage: 10Gi
- accessModes:
- - ReadWriteMany
- nfs:
- server:
- path: "/airflow-dags"
-
----
-apiVersion: v1
-kind: PersistentVolumeClaim
-metadata:
- labels:
- app: airflow
- release: openmetadata-dependencies
- name: openmetadata-dependencies-dags
- namespace: default
-spec:
- accessModes:
- - ReadWriteMany
- resources:
- requests:
- storage: 10Gi
- storageClassName: ""
-```
-
-Create Persistent Volumes and Persistent Volume claims with the below command.
-
-```commandline
-kubectl create -f dags_pv_pvc.yml
-```
-
-#### Code Samples for PV and PVC for Airflow Logs
-
-```yaml
-# logs_pv_pvc.yml
-apiVersion: v1
-kind: PersistentVolume
-metadata:
- name: openmetadata-dependencies-logs-pv
-spec:
- capacity:
- storage: 10Gi
- accessModes:
- - ReadWriteMany
- nfs:
- server:
- path: "/airflow-logs"
-
----
-apiVersion: v1
-kind: PersistentVolumeClaim
-metadata:
- labels:
- app: airflow
- name: openmetadata-dependencies-logs
- namespace: default
-spec:
- accessModes:
- - ReadWriteMany
- resources:
- requests:
- storage: 10Gi
- storageClassName: ""
-```
-
-Create Persistent Volumes and Persistent Volume claims with the below command.
-
-```commandline
-kubectl create -f logs_pv_pvc.yml
-```
-
-### Change owner and permission manually on disks
-
-Since airflow pods run as non root users, they would not have write access on the nfs server volumes. In order to fix the permission here, spin up a pod with persistent volumes attached and run it once.
-
-```yaml
-# permissions_pod.yml
-apiVersion: v1
-kind: Pod
-metadata:
- creationTimestamp: null
- labels:
- run: my-permission-pod
- name: my-permission-pod
-spec:
- containers:
- - image: nginx
- name: my-permission-pod
- volumeMounts:
- - name: airflow-dags
- mountPath: /airflow-dags
- - name: airflow-logs
- mountPath: /airflow-logs
- volumes:
- - name: airflow-logs
- persistentVolumeClaim:
- claimName: openmetadata-dependencies-logs
- - name: airflow-dags
- persistentVolumeClaim:
- claimName: openmetadata-dependencies-dags
- dnsPolicy: ClusterFirst
- restartPolicy: Always
-```
-
-
-
-Airflow runs the pods with linux user name as airflow and linux user id as 50000.
-
-
-
-Run the below command to create the pod and fix the permissions
-
-```commandline
-kubectl create -f permissions_pod.yml
-```
-
-Once the permissions pod is up and running, execute the below commands within the container.
-
-```commandline
-kubectl exec --tty my-permission-pod --container my-permission-pod -- chown -R 50000 /airflow-dags /airflow-logs
-# If needed
-kubectl exec --tty my-permission-pod --container my-permission-pod -- chmod -R a+rwx /airflow-dags
-```
-
-### Create OpenMetadata dependencies Values
-
-Override openmetadata dependencies airflow helm values to bind the nfs persistent volumes for DAGs and logs.
-
-```yaml
-# values-dependencies.yml
-airflow:
- airflow:
- extraVolumeMounts:
- - mountPath: /airflow-logs
- name: nfs-airflow-logs
- - mountPath: /airflow-dags/dags
- name: nfs-airflow-dags
- extraVolumes:
- - name: nfs-airflow-logs
- persistentVolumeClaim:
- claimName: openmetadata-dependencies-logs
- - name: nfs-airflow-dags
- persistentVolumeClaim:
- claimName: openmetadata-dependencies-dags
- config:
- AIRFLOW__OPENMETADATA_AIRFLOW_APIS__DAG_GENERATED_CONFIGS: "/airflow-dags/dags"
- dags:
- path: /airflow-dags/dags
- persistence:
- enabled: false
- logs:
- path: /airflow-logs
- persistence:
- enabled: false
-```
-
-
-For more information on airflow helm chart values, please refer to [airflow-helm](https://artifacthub.io/packages/helm/airflow-helm/airflow/8.8.0).
-
-When deploying openmeteadata dependencies helm chart, use the below command -
-
-```commandline
-helm install openmetadata-dependencies open-metadata/openmetadata-dependencies --values values-dependencies.yaml
-```
-
-
-
-The above command uses configurations defined [here](https://raw.githubusercontent.com/open-metadata/openmetadata-helm-charts/main/charts/deps/values.yaml).
-You can modify any configuration and deploy by passing your own `values.yaml`
-
-```commandline
-helm install openmetadata-dependencies open-metadata/openmetadata-dependencies --values
-```
-
-Once the openmetadata dependencies helm chart deployed, you can then run the below command to install the openmetadata helm chart -
-
-```commandline
-helm install openmetadata open-metadata/openmetadata --values
-```
-
-Make sure to create CloudSQL and ElasticSearch credentials as Kubernetes Secrets mentioned [here](/v1.13.x-SNAPSHOT/quick-start/local-kubernetes-deployment#2.-create-kubernetes-secrets-required-for-helm-charts).
-
-Also, disable MySQL and ElasticSearch from OpenMetadata Dependencies Helm Charts as mentioned in the FAQs [here](#how-to-disable-mysql-and-elasticsearch-from-openmetadata-dependencies-helm-charts).
-
-## Troubleshooting
-
-### Pods are stuck in Pending State due to Persistent Volume Creation Failure
-
-If you came across `invalid access type while creating the pvc`, and the permission pod is stuck in "pending" state.
-
-The above error might have occurred due to the pvc volumes not setup or pvc volumes are not mounted properly.
-
-
-
-
-Please validate:
-- all the prerequisites mentioned in this [section](#prerequisites)
-- the configuration of `dags_pv_pvc.yml` file
-- `storageClassName` field in YAML file
-
-## FAQs
-
-
\ No newline at end of file
+
+For deployments using Apache Airflow as the orchestrator, see the [GKE Airflow Orchestrator](/v1.13.x-SNAPSHOT/deployment/kubernetes/gke/airflow) guide.
+
diff --git a/v1.13.x-SNAPSHOT/deployment/kubernetes/gke/airflow.mdx b/v1.13.x-SNAPSHOT/deployment/kubernetes/gke/airflow.mdx
new file mode 100644
index 00000000..e61421b4
--- /dev/null
+++ b/v1.13.x-SNAPSHOT/deployment/kubernetes/gke/airflow.mdx
@@ -0,0 +1,321 @@
+---
+title: GKE with Apache Airflow Orchestrator | Official Documentation
+description: Configure Apache Airflow as the ingestion orchestrator on Google Kubernetes Engine with NFS persistent volumes and ReadWriteMany access.
+sidebarTitle: Airflow Orchestrator
+collate: false
+---
+
+import Faqs from '/snippets/deployment/faqs.mdx'
+
+# GKE with Apache Airflow Orchestrator
+
+If you prefer to use Apache Airflow as the orchestrator (e.g., for existing Airflow investments or complex DAG requirements), follow the configuration below.
+
+
+Using Airflow requires additional infrastructure: persistent volumes with ReadWriteMany access, the openmetadata-dependencies Helm chart, and more complex configuration.
+
+
+## Persistent Volumes with ReadWriteMany Access Modes
+
+OpenMetadata helm chart depends on Airflow and Airflow expects a persistent disk that support ReadWriteMany (the volume can be mounted as read-write by many nodes).
+
+The workaround is to create nfs-server disk on Google Kubernetes Engine and use that as the persistent claim and deploy OpenMetadata by implementing the following steps in order.
+
+### Create NFS Share
+
+#### Provision GCP Persistent Disk for Google Kubernetes Engine
+
+Run the below command to create a gcloud compute zonal disk. For more information on Google Cloud Disk Options, please visit [here](https://cloud.google.com/compute/docs/disks).
+
+```commandline
+gcloud compute disks create --size=100GB --zone= nfs-disk
+```
+
+#### Deploy NFS Server in GKE
+
+```yaml
+# nfs-server-deployment.yml
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+ name: nfs-server
+spec:
+ replicas: 1
+ selector:
+ matchLabels:
+ role: nfs-server
+ template:
+ metadata:
+ labels:
+ role: nfs-server
+ spec:
+ initContainers:
+ - name: init-airflow-directories
+ image: busybox
+ command: ['sh', '-c', 'mkdir -p /exports/airflow-dags /exports/airflow-logs']
+ volumeMounts:
+ - mountPath: /exports
+ name: nfs-pvc
+ containers:
+ - name: nfs-server
+ image: itsthenetwork/nfs-server-alpine
+ env:
+ - name: SHARED_DIRECTORY
+ value: /exports
+ ports:
+ - name: nfs
+ containerPort: 2049
+ securityContext:
+ privileged: true
+ volumeMounts:
+ - mountPath: /exports
+ name: nfs-pvc
+ volumes:
+ - name: nfs-pvc
+ gcePersistentDisk:
+ pdName: nfs-disk
+ fsType: ext4
+---
+# nfs-cluster-ip-service.yml
+apiVersion: v1
+kind: Service
+metadata:
+ name: nfs-server
+spec:
+ ports:
+ - name: nfs
+ port: 2049
+ selector:
+ role: nfs-server
+```
+Run the commands below and ensure the pods are running.
+
+```commandline
+kubectl create -f nfs-server-deployment.yml
+kubectl create -f nfs-cluster-ip-service.yml
+```
+
+We create a ClusterIP Service for pods to access NFS within the cluster at a fixed IP/DNS.
+
+#### Provision NFS backed PV and PVC for Airflow DAGs and Airflow Logs
+
+Update `` with the NFS Service Cluster IP Address for below code snippets.
+You can get the clusterIP using the following command
+
+```commandline
+kubectl get service nfs-server -o jsonpath='{.spec.clusterIP}'
+```
+
+#### Code Samples for PV and PVC for Airflow DAGs
+
+```yaml
+# dags_pv_pvc.yml
+apiVersion: v1
+kind: PersistentVolume
+metadata:
+ name: openmetadata-dependencies-dags-pv
+spec:
+ capacity:
+ storage: 10Gi
+ accessModes:
+ - ReadWriteMany
+ nfs:
+ server:
+ path: "/airflow-dags"
+
+---
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+ labels:
+ app: airflow
+ release: openmetadata-dependencies
+ name: openmetadata-dependencies-dags
+ namespace: default
+spec:
+ accessModes:
+ - ReadWriteMany
+ resources:
+ requests:
+ storage: 10Gi
+ storageClassName: ""
+```
+
+Create Persistent Volumes and Persistent Volume claims with the below command.
+
+```commandline
+kubectl create -f dags_pv_pvc.yml
+```
+
+#### Code Samples for PV and PVC for Airflow Logs
+
+```yaml
+# logs_pv_pvc.yml
+apiVersion: v1
+kind: PersistentVolume
+metadata:
+ name: openmetadata-dependencies-logs-pv
+spec:
+ capacity:
+ storage: 10Gi
+ accessModes:
+ - ReadWriteMany
+ nfs:
+ server:
+ path: "/airflow-logs"
+
+---
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+ labels:
+ app: airflow
+ name: openmetadata-dependencies-logs
+ namespace: default
+spec:
+ accessModes:
+ - ReadWriteMany
+ resources:
+ requests:
+ storage: 10Gi
+ storageClassName: ""
+```
+
+Create Persistent Volumes and Persistent Volume claims with the below command.
+
+```commandline
+kubectl create -f logs_pv_pvc.yml
+```
+
+### Change owner and permission manually on disks
+
+Since airflow pods run as non root users, they would not have write access on the nfs server volumes. In order to fix the permission here, spin up a pod with persistent volumes attached and run it once.
+
+```yaml
+# permissions_pod.yml
+apiVersion: v1
+kind: Pod
+metadata:
+ creationTimestamp: null
+ labels:
+ run: my-permission-pod
+ name: my-permission-pod
+spec:
+ containers:
+ - image: nginx
+ name: my-permission-pod
+ volumeMounts:
+ - name: airflow-dags
+ mountPath: /airflow-dags
+ - name: airflow-logs
+ mountPath: /airflow-logs
+ volumes:
+ - name: airflow-logs
+ persistentVolumeClaim:
+ claimName: openmetadata-dependencies-logs
+ - name: airflow-dags
+ persistentVolumeClaim:
+ claimName: openmetadata-dependencies-dags
+ dnsPolicy: ClusterFirst
+ restartPolicy: Always
+```
+
+
+
+Airflow runs the pods with linux user name as airflow and linux user id as 50000.
+
+
+
+Run the below command to create the pod and fix the permissions
+
+```commandline
+kubectl create -f permissions_pod.yml
+```
+
+Once the permissions pod is up and running, execute the below commands within the container.
+
+```commandline
+kubectl exec --tty my-permission-pod --container my-permission-pod -- chown -R 50000 /airflow-dags /airflow-logs
+# If needed
+kubectl exec --tty my-permission-pod --container my-permission-pod -- chmod -R a+rwx /airflow-dags
+```
+
+### Create OpenMetadata dependencies Values
+
+Override openmetadata dependencies airflow helm values to bind the nfs persistent volumes for DAGs and logs.
+
+```yaml
+# values-dependencies.yml
+airflow:
+ airflow:
+ extraVolumeMounts:
+ - mountPath: /airflow-logs
+ name: nfs-airflow-logs
+ - mountPath: /airflow-dags/dags
+ name: nfs-airflow-dags
+ extraVolumes:
+ - name: nfs-airflow-logs
+ persistentVolumeClaim:
+ claimName: openmetadata-dependencies-logs
+ - name: nfs-airflow-dags
+ persistentVolumeClaim:
+ claimName: openmetadata-dependencies-dags
+ config:
+ AIRFLOW__OPENMETADATA_AIRFLOW_APIS__DAG_GENERATED_CONFIGS: "/airflow-dags/dags"
+ dags:
+ path: /airflow-dags/dags
+ persistence:
+ enabled: false
+ logs:
+ path: /airflow-logs
+ persistence:
+ enabled: false
+```
+
+
+For more information on airflow helm chart values, please refer to [airflow-helm](https://artifacthub.io/packages/helm/airflow-helm/airflow/8.8.0).
+
+When deploying openmeteadata dependencies helm chart, use the below command -
+
+```commandline
+helm install openmetadata-dependencies open-metadata/openmetadata-dependencies --values values-dependencies.yaml
+```
+
+
+
+The above command uses configurations defined [here](https://raw.githubusercontent.com/open-metadata/openmetadata-helm-charts/main/charts/deps/values.yaml).
+You can modify any configuration and deploy by passing your own `values.yaml`
+
+```commandline
+helm install openmetadata-dependencies open-metadata/openmetadata-dependencies --values
+```
+
+Once the openmetadata dependencies helm chart deployed, you can then run the below command to install the openmetadata helm chart -
+
+```commandline
+helm install openmetadata open-metadata/openmetadata --values
+```
+
+Make sure to create CloudSQL and ElasticSearch credentials as Kubernetes Secrets mentioned [here](/v1.13.x-SNAPSHOT/quick-start/local-kubernetes-deployment#2.-create-kubernetes-secrets-required-for-helm-charts).
+
+Also, disable MySQL and ElasticSearch from OpenMetadata Dependencies Helm Charts as mentioned in the FAQs [here](#how-to-disable-mysql-and-elasticsearch-from-openmetadata-dependencies-helm-charts).
+
+## Troubleshooting
+
+### Pods are stuck in Pending State due to Persistent Volume Creation Failure
+
+If you came across `invalid access type while creating the pvc`, and the permission pod is stuck in "pending" state.
+
+The above error might have occurred due to the pvc volumes not setup or pvc volumes are not mounted properly.
+
+
+
+
+Please validate:
+- all the prerequisites mentioned in this [section](#prerequisites)
+- the configuration of `dags_pv_pvc.yml` file
+- `storageClassName` field in YAML file
+
+## FAQs
+
+
diff --git a/v1.13.x-SNAPSHOT/how-to-guides/data-quality-observability/quality/data-quality-as-code/test-runner.mdx b/v1.13.x-SNAPSHOT/how-to-guides/data-quality-observability/quality/data-quality-as-code/test-runner.mdx
index c9b1b7f2..c8a6aea4 100644
--- a/v1.13.x-SNAPSHOT/how-to-guides/data-quality-observability/quality/data-quality-as-code/test-runner.mdx
+++ b/v1.13.x-SNAPSHOT/how-to-guides/data-quality-observability/quality/data-quality-as-code/test-runner.mdx
@@ -1,7 +1,7 @@
---
title: TestRunner - Running Table-Level Tests
description: Execute data quality tests against tables in OpenMetadata using the TestRunner API
-sidebarTitle: Test Runner
+sidebarTitle: Overview
---
# TestRunner - Running Table-Level Tests
@@ -20,11 +20,10 @@ The `TestRunner` class provides a fluent API for executing data quality tests ag
- [Integration with ETL Workflows](#integration-with-etl-workflows)
- [Error Handling](#error-handling)
- [Best Practices](#best-practices)
-- [Using External Secrets Managers](#using-external-secrets-managers)
- [Next Steps](#next-steps)
-⚠️ If you're using OpenMetadata Cloud to run OpenMetadata, please refer to the section about [External Secrets Managers](#using-external-secrets-managers)
+⚠️ If you're using OpenMetadata Cloud to run OpenMetadata, please refer to [External Secrets Managers](/v1.13.x-SNAPSHOT/how-to-guides/data-quality-observability/quality/data-quality-as-code/test-runner/external-secrets) before using the TestRunner API.
## Overview
@@ -408,181 +407,14 @@ except Exception as e:
5. **Combine table and column tests**: Ensure both structural and content quality
-## Using External Secrets Managers
-### Important Note
-
-If your OpenMetadata instance uses **database-stored credentials** (the default configuration), you do not need to follow this guide. The SDK will automatically retrieve and decrypt credentials.
-
-This guide is only necessary when your organization uses an **external secrets manager** for credential storage.
-
-### Why This is Required
-
-The `TestRunner` API executes data quality tests directly from your Python code (e.g., within your ETL pipelines). To connect to your data sources, it needs to:
-
-1. Retrieve the service connection configuration from OpenMetadata
-2. Decrypt the credentials stored in your secrets manager
-3. Establish a connection to the data source
-4. Execute the test cases
-
-Without proper secrets manager configuration, the SDK cannot decrypt credentials and will fail to connect to your data sources.
-
-### General Setup Steps
-
-1. **Contact your OpenMetadata/OpenMetadata administrator** to obtain:
- - The secrets manager type (AWS, Azure, GCP, etc.)
- - The secrets manager loader configuration
- - Required environment variables or configuration files
- - Any additional setup (IAM roles, service principals, etc.)
-
-2. **Install required dependencies** for your secrets manager provider
-
-3. **Configure environment variables** with access credentials
-
-4. **Initialize the SecretsManagerFactory** before using TestRunner
-
-5. **Configure the SDK** and run your tests
-
-### Example using AWS Secrets Manager
-
-**Required Dependencies:**
-```bash
-pip install "openmetadata-ingestion[aws]>=1.11.0.0"
-```
-
-**Example Configuration:**
-```python
-import os
-
-from metadata.generated.schema.security.secrets.secretsManagerClientLoader import SecretsManagerClientLoader
-from metadata.generated.schema.security.secrets.secretsManagerProvider import SecretsManagerProvider
-from metadata.sdk import configure
-from metadata.sdk.data_quality import TestRunner
-from metadata.utils.secrets.secrets_manager_factory import SecretsManagerFactory
-
-# Set AWS credentials and region
-os.environ["AWS_ACCESS_KEY_ID"] = "your-access-key-id"
-os.environ["AWS_SECRET_ACCESS_KEY"] = "your-secret-access-key"
-os.environ["AWS_DEFAULT_REGION"] = "us-east-1" # Your AWS region
-
-# Initialize secrets manager (must be done before configure())
-SecretsManagerFactory(
- secrets_manager_provider=SecretsManagerProvider.managed_aws,
- secrets_manager_loader=SecretsManagerClientLoader.env,
-)
-
-# Configure OpenMetadata SDK
-configure(
- host="https://your-openmetadata-instance.com/api",
- jwt_token="your-jwt-token",
-)
-
-# Use TestRunner as normal
-runner = TestRunner.for_table("MySQL.production.database.my_table")
-results = runner.run()
-```
-
-### Configuration by Provider
-
-#### AWS and AWS Parameters Store
-
-**OpenMetadata's ingestion extras**: `aws` (e.g `pip install 'openmetadata-ingestion[aws]'`)
-
-**SecretsManagerProvider: (one of)**
-- `SecretsManagerProvider.aws`
-- `SecretsManagerProvider.managed_aws`
-- `SecretsManagerProvider.aws_ssm`
-- `SecretsManagerProvider.managed_aws_ssm`
-
-**Environment variables:**
-- `AWS_ACCESS_KEY_ID`
-- `AWS_SECRET_ACCESS_KEY`
-- `AWS_DEFAULT_REGION`
-
-#### Azure Key Vault
-
-**OpenMetadata's ingestion extras**: `azure` (e.g `pip install 'openmetadata-ingestion[azure]'`)
-
-**SecretsManagerProvider: (one of)**
-- `SecretsManagerProvider.azure_kv`
-- `SecretsManagerProvider.managed_azure_kv`
-
-**Environment variables:**
-- `AZURE_CLIENT_ID`
-- `AZURE_CLIENT_SECRET`
-- `AZURE_TENANT_ID`
-- `AZURE_KEY_VAULT_NAME`
-
-#### Google Cloud Secret Manager
-
-**OpenMetadata's ingestion extras**: `gcp` (e.g `pip install 'openmetadata-ingestion[gcp]'`)
-
-**SecretsManagerProvider:** `SecretsManagerProvider.gcp`
-
-**Environment variables:**
-- `GOOGLE_APPLICATION_CREDENTIALS`: path to the file with the credentials json file
-- `GCP_PROJECT_ID`
-
-### Troubleshooting
-
-#### Error: "Cannot decrypt service connection"
-
-**Cause**: Secrets manager not initialized or misconfigured
-
-**Solution**: Ensure `SecretsManagerFactory` is initialized **before** calling `configure()` or creating the `TestRunner`
-
-#### Error: "Access Denied" or "Unauthorized"
-
-**Cause**: Insufficient permissions to access secrets
-
-**Solution**:
-- Verify IAM role/service principal has correct permissions
-- Check credentials are valid and not expired
-- Ensure correct region/vault name is specified
-
-#### Error: "Module not found" for secrets manager
-
-**Cause**: Missing dependencies for your secrets manager
-
-**Solution**: Install required extras:
-```bash
-# For AWS
-pip install "openmetadata-ingestion[aws]"
-
-# For Azure
-pip install "openmetadata-ingestion[azure]"
-
-# For GCP
-pip install "openmetadata-ingestion[gcp]"
-```
-
-#### Tests Fail with Connection Errors
-
-**Cause**: Credentials not properly decrypted or secrets manager misconfigured
-
-**Solution**:
-1. Verify secrets manager provider matches your OpenMetadata backend configuration
-2. Test credential access independently (e.g., using AWS CLI, Azure CLI, gcloud)
-3. Check network connectivity to secrets manager service
-4. Enable debug logging to see detailed error messages:
-
-```python
-import logging
-logging.basicConfig(level=logging.DEBUG)
-```
-
-### Contact Your Administrator
-
-If you're unsure about:
-- Which secrets manager your organization uses
-- Required environment variables or configuration
-- Access credentials or IAM roles
-- Permissions needed
-
-**Contact your OpenMetadata or OpenMetadata administrator** for the specific configuration required in your environment.
+
+If your organization uses an external secrets manager (AWS, Azure, GCP), see [External Secrets Managers](/v1.13.x-SNAPSHOT/how-to-guides/data-quality-observability/quality/data-quality-as-code/test-runner/external-secrets) before using the TestRunner API.
+
## Next Steps
- Learn about [DataFrame Validation](/v1.13.x-SNAPSHOT/how-to-guides/data-quality-observability/quality/data-quality-as-code/dataframe-validation) for validating transformations
- Review the [Test Definitions Reference](/v1.13.x-SNAPSHOT/how-to-guides/data-quality-observability/quality/data-quality-as-code/test-definitions) for all available tests
- Explore [Advanced Usage](/v1.13.x-SNAPSHOT/how-to-guides/data-quality-observability/quality/data-quality-as-code/advanced-usage) including YAML workflows
+
diff --git a/v1.13.x-SNAPSHOT/how-to-guides/data-quality-observability/quality/data-quality-as-code/test-runner/external-secrets.mdx b/v1.13.x-SNAPSHOT/how-to-guides/data-quality-observability/quality/data-quality-as-code/test-runner/external-secrets.mdx
new file mode 100644
index 00000000..1cf58669
--- /dev/null
+++ b/v1.13.x-SNAPSHOT/how-to-guides/data-quality-observability/quality/data-quality-as-code/test-runner/external-secrets.mdx
@@ -0,0 +1,182 @@
+---
+title: External Secrets Managers | TestRunner
+description: Configure external secrets managers (AWS, Azure, GCP) for the TestRunner API when your OpenMetadata instance uses an external secrets manager.
+sidebarTitle: External Secrets
+---
+
+# Using External Secrets Managers
+
+## Important Note
+
+If your OpenMetadata instance uses **database-stored credentials** (the default configuration), you do not need to follow this guide. The SDK will automatically retrieve and decrypt credentials.
+
+This guide is only necessary when your organization uses an **external secrets manager** for credential storage.
+
+
+⚠️ If you're using OpenMetadata Cloud to run OpenMetadata, please refer to this guide to configure your external secrets manager before using the TestRunner API.
+
+
+## Why This is Required
+
+The `TestRunner` API executes data quality tests directly from your Python code (e.g., within your ETL pipelines). To connect to your data sources, it needs to:
+
+1. Retrieve the service connection configuration from OpenMetadata
+2. Decrypt the credentials stored in your secrets manager
+3. Establish a connection to the data source
+4. Execute the test cases
+
+Without proper secrets manager configuration, the SDK cannot decrypt credentials and will fail to connect to your data sources.
+
+## General Setup Steps
+
+1. **Contact your OpenMetadata/OpenMetadata administrator** to obtain:
+ - The secrets manager type (AWS, Azure, GCP, etc.)
+ - The secrets manager loader configuration
+ - Required environment variables or configuration files
+ - Any additional setup (IAM roles, service principals, etc.)
+
+2. **Install required dependencies** for your secrets manager provider
+
+3. **Configure environment variables** with access credentials
+
+4. **Initialize the SecretsManagerFactory** before using TestRunner
+
+5. **Configure the SDK** and run your tests
+
+## Example using AWS Secrets Manager
+
+**Required Dependencies:**
+```bash
+pip install "openmetadata-ingestion[aws]>=1.11.0.0"
+```
+
+**Example Configuration:**
+```python
+import os
+
+from metadata.generated.schema.security.secrets.secretsManagerClientLoader import SecretsManagerClientLoader
+from metadata.generated.schema.security.secrets.secretsManagerProvider import SecretsManagerProvider
+from metadata.sdk import configure
+from metadata.sdk.data_quality import TestRunner
+from metadata.utils.secrets.secrets_manager_factory import SecretsManagerFactory
+
+# Set AWS credentials and region
+os.environ["AWS_ACCESS_KEY_ID"] = "your-access-key-id"
+os.environ["AWS_SECRET_ACCESS_KEY"] = "your-secret-access-key"
+os.environ["AWS_DEFAULT_REGION"] = "us-east-1" # Your AWS region
+
+# Initialize secrets manager (must be done before configure())
+SecretsManagerFactory(
+ secrets_manager_provider=SecretsManagerProvider.managed_aws,
+ secrets_manager_loader=SecretsManagerClientLoader.env,
+)
+
+# Configure OpenMetadata SDK
+configure(
+ host="https://your-openmetadata-instance.com/api",
+ jwt_token="your-jwt-token",
+)
+
+# Use TestRunner as normal
+runner = TestRunner.for_table("MySQL.production.database.my_table")
+results = runner.run()
+```
+
+## Configuration by Provider
+
+### AWS and AWS Systems Manager Parameter Store
+
+**OpenMetadata's ingestion extras**: `aws` (e.g `pip install 'openmetadata-ingestion[aws]'`)
+
+**SecretsManagerProvider: (one of)**
+- `SecretsManagerProvider.aws`
+- `SecretsManagerProvider.managed_aws`
+- `SecretsManagerProvider.aws_ssm`
+- `SecretsManagerProvider.managed_aws_ssm`
+
+**Environment variables:**
+- `AWS_ACCESS_KEY_ID`
+- `AWS_SECRET_ACCESS_KEY`
+- `AWS_DEFAULT_REGION`
+
+### Azure Key Vault
+
+**OpenMetadata's ingestion extras**: `azure` (e.g `pip install 'openmetadata-ingestion[azure]'`)
+
+**SecretsManagerProvider: (one of)**
+- `SecretsManagerProvider.azure_kv`
+- `SecretsManagerProvider.managed_azure_kv`
+
+**Environment variables:**
+- `AZURE_CLIENT_ID`
+- `AZURE_CLIENT_SECRET`
+- `AZURE_TENANT_ID`
+- `AZURE_KEY_VAULT_NAME`
+
+### Google Cloud Secret Manager
+
+**OpenMetadata's ingestion extras**: `gcp` (e.g `pip install 'openmetadata-ingestion[gcp]'`)
+
+**SecretsManagerProvider:** `SecretsManagerProvider.gcp`
+
+**Environment variables:**
+- `GOOGLE_APPLICATION_CREDENTIALS`: path to the file with the credentials json file
+- `GCP_PROJECT_ID`
+
+## Troubleshooting
+
+### Error: "Cannot decrypt service connection"
+
+**Cause**: Secrets manager not initialized or misconfigured
+
+**Solution**: Ensure `SecretsManagerFactory` is initialized **before** calling `configure()` or creating the `TestRunner`
+
+### Error: "Access Denied" or "Unauthorized"
+
+**Cause**: Insufficient permissions to access secrets
+
+**Solution**:
+- Verify IAM role/service principal has correct permissions
+- Check credentials are valid and not expired
+- Ensure correct region/vault name is specified
+
+### Error: "Module not found" for secrets manager
+
+**Cause**: Missing dependencies for your secrets manager
+
+**Solution**: Install required extras:
+```bash
+# For AWS
+pip install "openmetadata-ingestion[aws]"
+
+# For Azure
+pip install "openmetadata-ingestion[azure]"
+
+# For GCP
+pip install "openmetadata-ingestion[gcp]"
+```
+
+### Tests Fail with Connection Errors
+
+**Cause**: Credentials not properly decrypted or secrets manager misconfigured
+
+**Solution**:
+1. Verify secrets manager provider matches your OpenMetadata backend configuration
+2. Test credential access independently (e.g., using AWS CLI, Azure CLI, gcloud)
+3. Check network connectivity to secrets manager service
+4. Enable debug logging to see detailed error messages:
+
+```python
+import logging
+logging.basicConfig(level=logging.DEBUG)
+```
+
+## Contact Your Administrator
+
+If you're unsure about:
+- Which secrets manager your organization uses
+- Required environment variables or configuration
+- Access credentials or IAM roles
+- Permissions needed
+
+**Contact your OpenMetadata or OpenMetadata administrator** for the specific configuration required in your environment.