open-metadata · PRADDZY · Apr 19, 2026
diff --git a/.github/actions/openmetadata-impact-analysis/action.yml b/.github/actions/openmetadata-impact-analysis/action.yml
@@ -0,0 +1,104 @@
+name: OpenMetadata Impact Analysis
+description: Analyze dbt model changes with OpenMetadata lineage and publish report artifacts.
+
+inputs:
+  diff-path:
+    description: Path to a unified git diff file. Generated from base-ref when missing.
+    required: false
+    default: changes.diff
+  metadata-host:
+    description: OpenMetadata or Collate host URL.
+    required: false
+  metadata-token:
+    description: Bot JWT token with read access to metadata.
+    required: false
+  openai-api-key:
+    description: OpenAI API key for the LangChain analysis agent.
+    required: false
+  base-ref:
+    description: Base ref used to generate the diff when diff-path does not exist.
+    required: false
+    default: origin/main
+  paths:
+    description: Comma-separated path globs to analyze.
+    required: false
+    default: "**/models/**/*.sql,**/models/**/*.yml,**/models/**/*.yaml"
+  report-path:
+    description: Markdown report path.
+    required: false
+    default: impact_report.md
+  html-report-path:
+    description: Static HTML report artifact path.
+    required: false
+    default: impact_report.html
+  metadata-output-path:
+    description: JSON metadata output path.
+    required: false
+    default: impact_metadata.json
+  python-version:
+    description: Python version used to run the analyzer.
+    required: false
+    default: "3.11"
+
+outputs:
+  report-path:
+    description: Markdown report path.
+    value: ${{ steps.run.outputs.report-path }}
+  html-report-path:
+    description: Static HTML report artifact path.
+    value: ${{ steps.run.outputs.html-report-path }}
+  risk-level:
+    description: Deterministic risk level for the analyzed PR.
+    value: ${{ steps.run.outputs.risk-level }}
+  affected-count:
+    description: Count of affected OpenMetadata assets discovered in the report.
+    value: ${{ steps.run.outputs.affected-count }}
+
+runs:
+  using: composite
+  steps:
+    - name: Setup Python
+      uses: actions/setup-python@v5
+      with:
+        python-version: ${{ inputs.python-version }}
+        cache: pip
+        cache-dependency-path: python/pyproject.toml
+
+    - name: Generate diff when needed
+      shell: bash
+      run: |
+        if [ ! -f "${{ inputs.diff-path }}" ]; then
+          git diff "${{ inputs.base-ref }}...HEAD" > "${{ inputs.diff-path }}"
+        fi
+
+    - name: Install Python dependencies
+      shell: bash
+      run: |
+        python -m pip install --upgrade pip
+        python -m pip install -e "python/[langchain]" langchain langchain-openai
+
+    - name: Run impact analysis
+      id: run
+      shell: bash
+      env:
+        AI_SDK_HOST: ${{ inputs.metadata-host }}
+        AI_SDK_TOKEN: ${{ inputs.metadata-token }}
+        OPENAI_API_KEY: ${{ inputs.openai-api-key }}
+      run: |
+        python cookbook/mcp-impact-analysis/batch_analyzer.py "${{ inputs.diff-path }}" \
+          --output "${{ inputs.report-path }}" \
+          --html-output "${{ inputs.html-report-path }}" \
+          --metadata-output "${{ inputs.metadata-output-path }}" \
+          --paths "${{ inputs.paths }}"
+
+        echo "report-path=${{ inputs.report-path }}" >> "$GITHUB_OUTPUT"
+        echo "html-report-path=${{ inputs.html-report-path }}" >> "$GITHUB_OUTPUT"
+        METADATA_OUTPUT_PATH="${{ inputs.metadata-output-path }}" python - <<'PY' >> "$GITHUB_OUTPUT"
+        import json
+        import os
+        from pathlib import Path
+
+        metadata = json.loads(Path(os.environ["METADATA_OUTPUT_PATH"]).read_text())
+        print(f"risk-level={metadata['risk_level']}")
+        print(f"affected-count={metadata['affected_count']}")
+        PY
diff --git a/.github/workflows/impact-analysis.yml b/.github/workflows/impact-analysis.yml
@@ -1,10 +1,11 @@
 name: Data Impact Analysis
 
 on:
-  # Disabling trigger, keeping it for demos
-  # pull_request:
-  #   paths:
-  #     - 'cookbook/resources/demo-database/dbt/models/**'
+  pull_request:
+    paths:
+      - "cookbook/resources/demo-database/dbt/models/**/*.sql"
+      - "cookbook/resources/demo-database/dbt/models/**/*.yml"
+      - "cookbook/resources/demo-database/dbt/models/**/*.yaml"
   workflow_dispatch:
 
 concurrency:
@@ -13,58 +14,73 @@ concurrency:
 
 permissions:
   contents: read
+  pull-requests: write
+  issues: write
 
 jobs:
   analyze:
     name: Impact Analysis
     runs-on: ubuntu-latest
-    environment: test
-    permissions:
-      contents: read
-      pull-requests: write
 
     steps:
       - name: Checkout code
         uses: actions/checkout@v4
         with:
           fetch-depth: 0
 
-      - name: Setup Python
-        uses: actions/setup-python@v5
-        with:
-          python-version: '3.11'
-          cache: 'pip'
-          cache-dependency-path: python/pyproject.toml
-
-      - name: Install dependencies
-        run: |
-          pip install -e python/[langchain]
-          pip install langchain langchain-openai
-
       - name: Generate diff
+        shell: bash
         run: |
-          git diff origin/main...HEAD > changes.diff
-          echo "--- Diff contents ---"
-          cat changes.diff
+          if [ "${{ github.event_name }}" = "pull_request" ]; then
+            git diff "${{ github.event.pull_request.base.sha }}...HEAD" > changes.diff
+          else
+            git fetch origin main --depth=1
+            git diff origin/main...HEAD > changes.diff
+          fi
 
-      - name: Run impact analysis
-        working-directory: cookbook/mcp-impact-analysis
-        env:
-          AI_SDK_HOST: ${{ secrets.AI_SDK_HOST }}
-          AI_SDK_TOKEN: ${{ secrets.AI_SDK_TOKEN }}
-          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
-        run: python batch_analyzer.py ../../changes.diff > ../../impact_report.md
+      - name: Run OpenMetadata impact analysis
+        id: impact
+        uses: ./.github/actions/openmetadata-impact-analysis
+        with:
+          diff-path: changes.diff
+          metadata-host: ${{ secrets.AI_SDK_HOST }}
+          metadata-token: ${{ secrets.AI_SDK_TOKEN }}
+          openai-api-key: ${{ secrets.OPENAI_API_KEY }}
+          paths: "cookbook/resources/demo-database/dbt/models/**/*.sql,cookbook/resources/demo-database/dbt/models/**/*.yml,cookbook/resources/demo-database/dbt/models/**/*.yaml"
+
+      - name: Find existing PR comment
+        if: github.event_name == 'pull_request'
+        uses: peter-evans/find-comment@v3
+        id: find-comment
+        with:
+          issue-number: ${{ github.event.pull_request.number }}
+          comment-author: github-actions[bot]
+          body-includes: openmetadata-impact-analysis
 
       - name: Post PR comment
         if: github.event_name == 'pull_request'
         uses: peter-evans/create-or-update-comment@v4
         with:
+          comment-id: ${{ steps.find-comment.outputs.comment-id }}
           issue-number: ${{ github.event.pull_request.number }}
-          body-path: impact_report.md
+          body-path: ${{ steps.impact.outputs.report-path }}
+          edit-mode: replace
 
-      - name: Write to step summary (manual trigger)
-        if: github.event_name == 'workflow_dispatch'
+      - name: Write to step summary
+        shell: bash
         run: |
-          echo "## Data Impact Analysis" >> $GITHUB_STEP_SUMMARY
-          echo "" >> $GITHUB_STEP_SUMMARY
-          cat impact_report.md >> $GITHUB_STEP_SUMMARY
+          echo "## Data Impact Analysis" >> "$GITHUB_STEP_SUMMARY"
+          echo "" >> "$GITHUB_STEP_SUMMARY"
+          echo "- Risk: \`${{ steps.impact.outputs.risk-level }}\`" >> "$GITHUB_STEP_SUMMARY"
+          echo "- Affected assets: \`${{ steps.impact.outputs.affected-count }}\`" >> "$GITHUB_STEP_SUMMARY"
+          echo "" >> "$GITHUB_STEP_SUMMARY"
+          cat "${{ steps.impact.outputs.report-path }}" >> "$GITHUB_STEP_SUMMARY"
+
+      - name: Upload impact report artifact
+        uses: actions/upload-artifact@v4
+        with:
+          name: openmetadata-impact-analysis
+          path: |
+            ${{ steps.impact.outputs.report-path }}
+            ${{ steps.impact.outputs.html-report-path }}
+            impact_metadata.json
diff --git a/cookbook/mcp-impact-analysis/README.md b/cookbook/mcp-impact-analysis/README.md
@@ -248,29 +248,49 @@ Assets that could benefit from `loyalty_tier`:
 
 The [`batch_analyzer.py`](./batch_analyzer.py) script extends the interactive agent for automated use in pull requests. It:
 
-1. Reads a `git diff` file and extracts every changed `.sql` file under a `models/` directory.
-2. Spins up the same agent from `impact_analyzer.py`.
-3. Iterates over each changed model and asks the agent for its downstream impact.
-4. Prints a combined Markdown report suitable for posting as a PR comment.
+1. Reads a `git diff` file and extracts changed dbt `.sql`, `.yml`, and `.yaml` files under a `models/` directory.
+2. Uses the MCP-backed agent from `impact_analyzer.py` when credentials are available.
+3. Falls back to a deterministic review report when AI credentials are missing, so CI still produces useful output.
+4. Scores risk from affected assets, data-quality signals, PII/governance terms, and critical business asset mentions.
+5. Writes Markdown, optional static HTML, and optional JSON metadata for GitHub Action outputs.
 
 Run it locally:
 
 ```bash
 git diff origin/main...HEAD > changes.diff
-python batch_analyzer.py changes.diff
+python batch_analyzer.py changes.diff \
+  --output impact_report.md \
+  --html-output impact_report.html \
+  --metadata-output impact_metadata.json
 ```
 
 ### GitHub Actions Integration
 
-This repository includes a ready-to-use workflow at [`.github/workflows/impact-analysis.yml`](../../.github/workflows/impact-analysis.yml). It:
+This repository includes a reusable composite action at [`.github/actions/openmetadata-impact-analysis/action.yml`](../../.github/actions/openmetadata-impact-analysis/action.yml) and a ready-to-use workflow at [`.github/workflows/impact-analysis.yml`](../../.github/workflows/impact-analysis.yml). The workflow:
 
 1. **Triggers automatically** on PRs that modify dbt models under `cookbook/resources/demo-database/dbt/models/`.
 2. **Supports manual dispatch** via the Actions tab — useful for demos or ad-hoc runs on any branch.
 3. **Generates a diff** between the PR branch and `origin/main`.
-4. **Runs the batch analyzer** against the diff to produce a Markdown impact report.
-5. **Posts a PR comment** with the full report (or writes to the GitHub Step Summary for manual runs).
+4. **Runs the reusable action** against the diff to produce Markdown, HTML, and JSON outputs.
+5. **Posts or updates one PR comment** with the full report.
+6. **Uploads the HTML report** as a workflow artifact for review and demos.
 
-The workflow uses an HTML comment marker (`<!-- impact-analysis-bot -->`) to find and update its own comment on subsequent pushes, so you only ever see one impact analysis comment per PR.
+The report uses an HTML comment marker (`<!-- openmetadata-impact-analysis -->`) to find and update its own comment on subsequent pushes, so you only ever see one impact analysis comment per PR.
+
+#### Reusable Action
+
+Add this action to another repository after checking out code and generating a diff:
+
+```yaml
+- name: Run OpenMetadata impact analysis
+  uses: open-metadata/ai-sdk/.github/actions/openmetadata-impact-analysis@main
+  with:
+    diff-path: changes.diff
+    metadata-host: ${{ secrets.AI_SDK_HOST }}
+    metadata-token: ${{ secrets.AI_SDK_TOKEN }}
+    openai-api-key: ${{ secrets.OPENAI_API_KEY }}
+    paths: "**/models/**/*.sql,**/models/**/*.yml,**/models/**/*.yaml"
+```
 
 #### Required Secrets
 
@@ -282,6 +302,8 @@ Configure these in your repository settings under **Settings > Secrets and varia
 | `AI_SDK_TOKEN` | A bot JWT token with read access to metadata |
 | `OPENAI_API_KEY` | OpenAI API key for the LLM |
 
+If these secrets are not present, the action still completes and emits a fallback report that tells reviewers which changed models need manual OpenMetadata review.
+
 #### Demo Walkthrough
 
 To see the workflow in action: