diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json
index bd587969a..9296619ef 100644
--- a/.claude-plugin/marketplace.json
+++ b/.claude-plugin/marketplace.json
@@ -51,7 +51,7 @@
       "name": "snowflake",
       "source": "./plugins/snowflake",
       "description": "Snowflake data analysis commands for engineering metrics and reports",
-      "version": "0.4.0"
+      "version": "0.5.0"
     },
     {
       "name": "sosreport",
diff --git a/docs/data.json b/docs/data.json
index fb62d802e..624c88dd5 100644
--- a/docs/data.json
+++ b/docs/data.json
@@ -797,7 +797,7 @@
           "name": "Setup Snowflake"
         }
       ],
-      "version": "0.4.0"
+      "version": "0.5.0"
     },
     {
       "commands": [
diff --git a/plugins/snowflake/.claude-plugin/plugin.json b/plugins/snowflake/.claude-plugin/plugin.json
index 6c9610e1f..9648a3366 100644
--- a/plugins/snowflake/.claude-plugin/plugin.json
+++ b/plugins/snowflake/.claude-plugin/plugin.json
@@ -1,7 +1,7 @@
 {
   "name": "snowflake",
   "description": "Snowflake data analysis commands for engineering metrics and reports",
-  "version": "0.4.0",
+  "version": "0.5.0",
   "author": {
     "name": "github.com/openshift-eng"
   }
diff --git a/plugins/snowflake/README.md b/plugins/snowflake/README.md
index 97da3dd7f..cbebe5299 100644
--- a/plugins/snowflake/README.md
+++ b/plugins/snowflake/README.md
@@ -4,7 +4,7 @@ Snowflake data analysis commands for engineering metrics and reports. Uses the [
 
 ## Prerequisites
 
-1. **Snowflake access** -- You need an account on your organization's Snowflake instance with the appropriate role (e.g., `JIRA_CLOUDMARTS_GROUP` for Jira data). See [the data platform documentation](https://dataverse.pages.redhat.com/data-docs/data-users/) for access provisioning.
+1. **Snowflake access** -- You need an account on your organization's Snowflake instance with the `PUBLIC` role. See [the data platform documentation](https://dataverse.pages.redhat.com/data-docs/data-users/) for access provisioning.
 
 2. **Python 3** -- Required for report generation. Most systems have this pre-installed.
 
diff --git a/plugins/snowflake/commands/activity-type-report.md b/plugins/snowflake/commands/activity-type-report.md
index b2c797ee3..51f963e72 100644
--- a/plugins/snowflake/commands/activity-type-report.md
+++ b/plugins/snowflake/commands/activity-type-report.md
@@ -13,6 +13,8 @@ snowflake:activity-type-report
 /snowflake:activity-type-report <projects> [months] --todo
 /snowflake:activity-type-report <projects> [months] --all
 /snowflake:activity-type-report <projects> [months] --uncategorized
+/snowflake:activity-type-report <projects> [months] --uncategorized --todo
+/snowflake:activity-type-report <projects> [months] --uncategorized --all
 /snowflake:activity-type-report <projects> [months] --uncategorized --sample [N]
 ```
 
@@ -35,7 +37,7 @@ Activity type categories:
 
 ### Phase 1: Verify Snowflake Connection
 
-Read and follow the `setup-snowflake` skill. This checks for the Snowflake MCP server, guides the user through setup if needed, and sets the session context (`JIRA_CLOUDMARTS_GROUP` role, `JIRA_DB.CLOUDRHAI_MARTS` schema).
+Read and follow the `setup-snowflake` skill. This checks for the Snowflake MCP server, guides the user through setup if needed, and sets the session context (`PUBLIC` role, `JIRA_DB.CLOUDRHAI_MARTS` schema).
 
 If setup fails, abort with the guidance message from the skill. Do not proceed without a working Snowflake connection.
 
@@ -79,15 +81,28 @@ And `DPTP --uncategorized` means projects=DPTP, months=6 (default), closed issue
 Core query pattern (adapt based on available columns/views):
 
 ```sql
+WITH bot_issues AS (
+    SELECT DISTINCT ISSUE
+    FROM JIRA_LABEL_RHAI
+    WHERE LABEL IN (
+        'auto-created', 'bot-created', 'ai-generated', 'ai-generated-jira',
+        'cloud-automated-jira', 'on-call-bot', 'automated', 'team:automatic_rule',
+        'bot-duplicate',
+        'art:image-build-failure', 'art:reconciliation',
+        'acs-generated', 'triaged-test-automation'
+    )
+)
 SELECT
     ji.ISSUE_KEY AS ISSUEKEY,
     ji.PROJECT AS PROJECT_KEY,
     ji.SUMMARY,
     SUBSTR(ji.DESCRIPTION, 1, 2000) AS DESCRIPTION_EXCERPT,
     ji.CREATED,
+    CASE WHEN bi.ISSUE IS NOT NULL THEN TRUE ELSE FALSE END AS IS_BOT,
     -- join for issue type name: jit.PNAME AS ISSUE_TYPE
     -- join for status name: js.PNAME AS STATUS
 FROM JIRA_ISSUE_NON_PII ji
+LEFT JOIN bot_issues bi ON bi.ISSUE = ji.ID
 LEFT JOIN JIRA_ISSUETYPE_RHAI jit ON jit.ID = ji.ISSUETYPE
 LEFT JOIN JIRA_ISSUESTATUS_RHAI js ON js.ID = ji.ISSUESTATUS_ID
 -- If --uncategorized: LEFT JOIN JIRA_CUSTOMFIELDVALUE_NON_PII cfv
@@ -107,13 +122,23 @@ WHERE ji.PROJECT IN ('DPTP', 'TRT', ...)
 ORDER BY ji.CREATED DESC
 ```
 
-If `JIRA_NODEASSOCIATION_RHAI` and `JIRA_COMPONENT_RHAI` views exist, also fetch components:
+The `bot_issues` CTE identifies issues filed by automation bots via labels in `JIRA_LABEL_RHAI`. These labels were verified across 48 HP projects — they reliably distinguish bot-filed tickets (e.g., ART image-build-failure, ACM auto-created CVEs) from human engineering work. Labels describing automation *work* by humans (e.g., `automation`, `qe-automation`, `auto-closed`) are intentionally excluded.
+
+If `JIRA_NODEASSOCIATION_RHAI` and `JIRA_COMPONENT_RHAI` views exist, also fetch components (reuse the same `bot_issues` CTE from the query above — identical label list):
 
 ```sql
+WITH bot_issues AS (
+    -- Same CTE as main query above — keep label list in sync
+    SELECT DISTINCT ISSUE
+    FROM JIRA_LABEL_RHAI
+    WHERE LABEL IN (<<same 13 labels as main query above>>)
+)
 SELECT
     ji.ISSUE_KEY AS ISSUEKEY,
-    LISTAGG(c.CNAME, ', ') WITHIN GROUP (ORDER BY c.CNAME) AS COMPONENTS
+    LISTAGG(c.CNAME, ', ') WITHIN GROUP (ORDER BY c.CNAME) AS COMPONENTS,
+    MAX(CASE WHEN bi.ISSUE IS NOT NULL THEN TRUE ELSE FALSE END) AS IS_BOT
 FROM JIRA_ISSUE_NON_PII ji
+LEFT JOIN bot_issues bi ON bi.ISSUE = ji.ID
 LEFT JOIN JIRA_NODEASSOCIATION_RHAI na
     ON na.SOURCE_NODE_ID = ji.ID AND na.ASSOCIATION_TYPE = 'IssueComponent'
 LEFT JOIN JIRA_COMPONENT_RHAI c ON c.ID = na.SINK_NODE_ID
@@ -177,7 +202,7 @@ All subsequent phases write to `$RUN_DIR/`.
 
 **Cache check**: If `$RUN_DIR/classified_issues.json` already exists (full mode) or `$RUN_DIR/estimates.json` already exists (sample mode), skip classification entirely and go directly to Phase 5. Tell the user: "Found existing classification in `$RUN_DIR/` — skipping Vertex AI API call to save tokens. Delete the directory to force re-classification."
 
-Otherwise, write the fetched issues to `$RUN_DIR/issues.json` as a JSON array. Each object should include: `ISSUEKEY`, `PROJECT_KEY`, `SUMMARY`, `DESCRIPTION_EXCERPT`, `CREATED`, `ISSUE_TYPE`, `STATUS`, and `COMPONENTS` (if available).
+Otherwise, write the fetched issues to `$RUN_DIR/issues.json` as a JSON array. Each object should include: `ISSUEKEY`, `PROJECT_KEY`, `SUMMARY`, `DESCRIPTION_EXCERPT`, `CREATED`, `ISSUE_TYPE`, `STATUS`, `COMPONENTS` (if available), and `IS_BOT`.
 
 Find the scripts directory:
 ```bash
@@ -203,7 +228,7 @@ python3 "$SCRIPT_DIR/sample_and_estimate.py" \
   --draw-sample $RUN_DIR/sample_to_classify.json \
   --sample-size ${N:-0}
 ```
-(0 = auto-recommend based on ±2.5% target precision, typically ~400 issues)
+(0 = auto-recommend based on ±2.5% target precision, typically ~400 issues. Stratifies by (project, is_bot) to ensure both human and bot populations are represented in the sample.)
 
 **Step 2: Classify only the sample**
 ```bash
@@ -281,10 +306,37 @@ Include the status filter in the summary header. When `--uncategorized` is activ
 
 #### Full mode summary:
 
+When bot issues are detected (any issue has `IS_BOT=true`), show separate human and bot distributions. The human distribution is the primary output — it shows what engineers are actually working on. The bot distribution is secondary context.
+
 ```
 Activity Type Report: $RUN_DIR/activity-type-report.html
 
-54,478 issues across 52 projects (2025-10-02 to 2026-04-07)
+3,114 closed issues across 1 project (2026-01-22 to 2026-04-22)
+  Human: 38 (1.2%)  |  Automated/Bot: 3,076 (98.8%)
+
+Human Work — Activity Type Distribution:
+  Product / Portfolio Work             15 (39.5%)
+  Quality / Stability / Reliability     8 (21.1%)
+  Future Sustainability                 6 (15.8%)
+  Incidents & Support                   4 (10.5%)
+  Security & Compliance                 3  (7.9%)
+  Associate Wellness & Development      1  (2.6%)
+  Uncategorized                         1  (2.6%)
+
+Automated/Bot Work — Activity Type Distribution:
+  Quality / Stability / Reliability  3,050 (99.2%)
+  Product / Portfolio Work              15  (0.5%)
+  Uncategorized                         11  (0.4%)
+
+Classification cost: 86,313 input + 13,008 output = 99,321 tokens, $0.45
+```
+
+When zero bot issues are detected, omit the human/bot split and show the current format:
+
+```
+Activity Type Report: $RUN_DIR/activity-type-report.html
+
+247 closed issues across 1 project (2025-10-02 to 2026-04-07)
 
 Activity Type Distribution:
   Quality / Stability / Reliability    98 (39.7%)
@@ -300,7 +352,32 @@ Classification cost: 86,313 input + 13,008 output = 99,321 tokens, $0.45
 
 #### Sample mode summary:
 
-Include credible intervals and sample metadata:
+Include credible intervals and sample metadata. When bot issues are detected, show separate human and bot distributions with their own credible intervals.
+
+```
+Activity Type Report (Sampled Estimate): $RUN_DIR/activity-type-report.html
+
+4,338 issues across 1 project (2025-10-02 to 2026-04-07)
+  Human: 1,237 (28.5%)  |  Automated/Bot: 3,101 (71.5%)
+Sample: 369 classified (8.5%) — 25 API calls, $0.45
+
+Human Work — Activity Type Distribution (95% Credible Intervals):
+  Product / Portfolio Work           32.1%  [25.4% — 39.2%]
+  Quality / Stability / Reliability  22.8%  [17.0% — 29.3%]
+  Future Sustainability              16.5%  [11.4% — 22.4%]
+  Incidents & Support                12.3%  [ 7.9% — 17.5%]
+  Security & Compliance               8.7%  [ 5.1% — 13.3%]
+  Associate Wellness & Development    4.2%  [ 1.8% —  7.8%]
+  Uncategorized                       3.4%  [ 1.3% —  6.6%]
+
+Automated/Bot Work — Activity Type Distribution (95% Credible Intervals):
+  Quality / Stability / Reliability  96.2%  [94.1% — 97.8%]
+  Product / Portfolio Work            1.5%  [ 0.5% —  3.1%]
+  Uncategorized                       1.3%  [ 0.4% —  2.8%]
+  ...
+```
+
+When zero bot issues are detected, omit the split and show the original format:
 
 ```
 Activity Type Report (Sampled Estimate): $RUN_DIR/activity-type-report.html
@@ -318,7 +395,7 @@ Activity Type Distribution (95% Credible Intervals):
   Incidents & Support                 3.5%  [ 1.9% —  5.5%]
 ```
 
-Read the estimates from `$RUN_DIR/estimates.json` (field: `overall.estimates[]`, each with `category`, `posterior_mean`, `ci_low`, `ci_high`) and the usage from `$RUN_DIR/classified_sample_usage.txt` (or `classified_issues_usage.txt` in full mode).
+Read the estimates from `$RUN_DIR/estimates.json`. For the overall distribution, use `overall.estimates[]` (each with `category`, `posterior_mean`, `ci_low`, `ci_high`). When `human` and `bot` keys are present in the JSON, use `human.estimates[]` and `bot.estimates[]` for the separate distributions. Read usage from `$RUN_DIR/classified_sample_usage.txt` (or `classified_issues_usage.txt` in full mode).
 
 After the summary, tell the user the HTML report is available at the path shown and can be opened directly in a browser from their host filesystem.
 
@@ -340,7 +417,7 @@ After the summary, tell the user the HTML report is available at the path shown
   - N = sample size (default: auto-recommended for ±2.5% precision, typically ~400)
   - The report shows posterior means with 95% credible intervals instead of exact counts
   - Dramatically reduces API cost and time for large datasets (e.g., 27 API calls vs. 1,000+)
-  - Uses stratified sampling by project to ensure all projects are represented
+  - Uses stratified sampling by (project, is_bot) to ensure all projects and both human/bot populations are represented
 
 - **--todo** (optional)
   - Analyze only open/backlog issues (non-closed statuses: New, In Progress, To Do, Refinement, etc.)
@@ -366,8 +443,9 @@ Each run produces a directory under `.work/snowflake/reports/` containing the ra
 
 The report includes:
 - Sankey diagram: Project to Activity Type flows
-- Summary statistics
-- Searchable, paginated detail table with direct Jira links per issue
+- Human/All/Bot toggle (when bot issues are detected) to view distributions separately
+- Summary statistics with human/bot counts
+- Searchable, paginated detail table with direct Jira links per issue and Source column (Human/Bot)
 - CSV export capability
 
 ## Examples
@@ -417,7 +495,17 @@ The report includes:
    /snowflake:activity-type-report DPTP --uncategorized
    ```
 
-10. **Uncategorized with sampling:**
+10. **Uncategorized open/backlog issues:**
+    ```bash
+    /snowflake:activity-type-report DPTP,TRT 6 --uncategorized --todo
+    ```
+
+11. **Uncategorized across all statuses:**
+    ```bash
+    /snowflake:activity-type-report DPTP,TRT 6 --uncategorized --all
+    ```
+
+12. **Uncategorized with sampling:**
     ```bash
     /snowflake:activity-type-report DPTP,TRT,ART 6 --uncategorized --sample
     ```
@@ -436,5 +524,6 @@ The report includes:
 - **Self-contained output**: The HTML report works offline after generation -- no server needed.
 - **Cached classifications**: Re-running the same projects and date range skips the Vertex AI API call and reuses the existing `classified_issues.json` (or `estimates.json` in sample mode). Delete the run directory to force re-classification.
 - **Completed work by default**: By default, only closed issues (ISSUESTATUS_ID=6) with work-completed resolutions (RESOLUTION IN (10000, 10041) i.e. Done/Done-Errata, or NULL) are analyzed — this excludes no-work closures like Duplicate, Won't Do, Obsolete, Not a Bug, Can't Do, Cannot Reproduce, and MirrorOrphan (~25% of closed issues globally). Use `--todo` for open/backlog work, or `--all` for everything.
-- **Sampling mode**: For large datasets (thousands of issues), `--sample` uses Bayesian inference to estimate the activity type distribution from a small classified sample. Uses a Dirichlet-Multinomial conjugate model with uninformative priors — implemented entirely with Python stdlib (`random.gammavariate`). The report clearly labels results as estimates and shows credible intervals.
+- **Bot detection**: Issues filed by automation bots are identified via labels in `JIRA_LABEL_RHAI` (e.g., `auto-created`, `art:image-build-failure`, `ai-generated-jira`). The SQL CTE uses 13 verified bot labels covering general bot patterns and project-specific automation (ART, ACM, OCM, SREP, etc.). When bot issues are detected, the report shows a Human/All/Bot toggle and separate distributions. Labels describing automation *work* by humans (e.g., `automation`, `qe-automation`, `auto-closed`) are intentionally excluded. Projects with no bot issues show the standard single-view report.
+- **Sampling mode**: For large datasets (thousands of issues), `--sample` uses Bayesian inference to estimate the activity type distribution from a small classified sample. Uses a Dirichlet-Multinomial conjugate model with uninformative priors — implemented entirely with Python stdlib (`random.gammavariate`). Stratifies by (project, is_bot) to ensure both human and bot populations are represented. The report clearly labels results as estimates and shows credible intervals, with separate human/bot estimates when applicable.
 - **Uncategorized filter**: The `--uncategorized` flag uses `customfield_10464` (Activity Type) from the `JIRA_CUSTOMFIELDVALUE_NON_PII` view. **This custom field ID is specific to Red Hat JIRA instances.** The typical workflow is: run with `--uncategorized` to find and classify issues missing their Activity Type, review the report, then use `/jira:categorize-activity-type` to apply the classifications back to Jira.
diff --git a/plugins/snowflake/scripts/classify_issues.py b/plugins/snowflake/scripts/classify_issues.py
index 39451f683..32321174e 100644
--- a/plugins/snowflake/scripts/classify_issues.py
+++ b/plugins/snowflake/scripts/classify_issues.py
@@ -61,6 +61,14 @@
 VALID_CATEGORIES = set(ACTIVITY_TYPE_DEFINITIONS.keys())
 
 
+def _get_is_bot(issue):
+    """Extract bot flag from an issue, handling both Snowflake and processed formats."""
+    val = issue.get("IS_BOT", issue.get("is_bot", False))
+    if isinstance(val, str):
+        return val.lower() in ("true", "1", "yes")
+    return bool(val)
+
+
 def build_prompt(batch):
     """Build the classification prompt for a batch of issues."""
     defs_text = "\n\n".join(
@@ -283,6 +291,7 @@ def main():
             "status": issue.get("STATUS", issue.get("status", "")),
             "components": issue.get("COMPONENTS", issue.get("components", "")),
             "created": issue.get("CREATED", issue.get("created", "")),
+            "is_bot": _get_is_bot(issue),
         })
 
     # Write output
@@ -303,6 +312,14 @@ def main():
         pct = count / len(output) * 100
         print(f"  {cat:<45s} {count:>4d} ({pct:.1f}%)")
 
+    # Print bot/human split
+    bot_count = sum(1 for item in output if item.get("is_bot"))
+    human_count = len(output) - bot_count
+    if bot_count > 0:
+        print(f"\nBot/Human Split:")
+        print(f"  Human:     {human_count:>6,} ({human_count/len(output)*100:.1f}%)")
+        print(f"  Automated: {bot_count:>6,} ({bot_count/len(output)*100:.1f}%)")
+
     # Print cost summary
     total_tokens = total_input_tokens + total_output_tokens
     print(f"\nAPI Usage:")
diff --git a/plugins/snowflake/scripts/generate_sankey.py b/plugins/snowflake/scripts/generate_sankey.py
index 7abb57ed3..a32425222 100644
--- a/plugins/snowflake/scripts/generate_sankey.py
+++ b/plugins/snowflake/scripts/generate_sankey.py
@@ -41,8 +41,19 @@
 }
 
 
-def generate_d3_sankey(data):
-    """Generate a pure-JS D3 sankey when Plotly is not installed."""
+def _get_is_bot(issue):
+    """Extract bot flag from an issue."""
+    val = issue.get("IS_BOT", issue.get("is_bot", False))
+    if isinstance(val, str):
+        return val.lower() in ("true", "1", "yes")
+    return bool(val)
+
+
+def generate_d3_sankey(data, container_id="d3-sankey"):
+    """Generate a pure-JS D3 sankey diagram."""
+    if not data:
+        return f'<div id="{container_id}" style="padding:2rem;text-align:center;color:var(--text-muted);">No issues in this view</div>'
+
     flow_counts = Counter()
     for issue in data:
         flow_counts[(issue["project_key"], issue["activity_type"])] += 1
@@ -72,14 +83,18 @@ def generate_d3_sankey(data):
     )
 
     return f"""
-    <div id="d3-sankey" style="width:100%;min-height:500px;"></div>
-    <script src="https://cdn.jsdelivr.net/npm/d3@7/dist/d3.min.js"></script>
-    <script src="https://cdn.jsdelivr.net/npm/d3-sankey@0.12/dist/d3-sankey.min.js"></script>
+    <div id="{container_id}" style="width:100%;min-height:500px;"></div>
     <script>
     (function() {{
-      const width = document.getElementById('d3-sankey').clientWidth;
+      var el = document.getElementById('{container_id}');
+      var parent = el.parentElement;
+      if ((parent && parent.style.display === 'none') || el.style.display === 'none') {{
+        el.setAttribute('data-pending', 'true');
+        return;
+      }}
+      const width = el.clientWidth || 800;
       const height = Math.max(500, {len(activity_types)} * 60 + 100);
-      const svg = d3.select('#d3-sankey').append('svg')
+      const svg = d3.select('#{container_id}').append('svg')
         .attr('width', width).attr('height', height);
 
       const sankey = d3.sankey()
@@ -124,11 +139,12 @@ def generate_d3_sankey(data):
 
 
 
-def generate_summary_stats(data, estimates=None):
+def generate_summary_stats(data, estimates=None, estimates_key="overall"):
     """Generate summary statistics HTML.
 
     If estimates is provided (from sample_and_estimate.py), shows Bayesian
-    credible intervals instead of raw counts.
+    credible intervals instead of raw counts. estimates_key selects which
+    sub-estimate to use ("overall", "human", or "bot").
     """
     total = len(data)
     by_type = Counter(issue["activity_type"] for issue in data)
@@ -137,9 +153,11 @@ def generate_summary_stats(data, estimates=None):
     rows = ""
     if estimates:
         # Sampling mode: show posterior estimates with credible intervals
-        overall = estimates.get("overall", estimates)
+        est_section = estimates.get(estimates_key, estimates.get("overall", estimates))
+        if est_section is None:
+            est_section = estimates.get("overall", estimates)
         ci_pct = int(estimates.get("confidence", 0.95) * 100)
-        for est in overall.get("estimates", []):
+        for est in est_section.get("estimates", []):
             cat = est["category"]
             color = ACTIVITY_COLORS.get(cat, "#9E9E9E")
             mean_pct = est["posterior_mean"] * 100
@@ -326,8 +344,22 @@ def generate_html(data, title, projects_str, months, usage_info=None,
                    the report shows Bayesian credible intervals and marks itself
                    as a sampled estimate.
     """
-    sankey_html = generate_d3_sankey(data)
-    summary_html = generate_summary_stats(data, estimates=estimates)
+    # Split data into human and bot populations
+    human_data = [d for d in data if not _get_is_bot(d)]
+    bot_data = [d for d in data if _get_is_bot(d)]
+    has_bots = len(bot_data) > 0
+
+    # Generate sankeys for each view
+    if has_bots:
+        sankey_human = generate_d3_sankey(human_data, "d3-sankey-human")
+        sankey_bot = generate_d3_sankey(bot_data, "d3-sankey-bot")
+        sankey_all = generate_d3_sankey(data, "d3-sankey-all")
+        summary_human = generate_summary_stats(human_data, estimates=estimates, estimates_key="human")
+        summary_bot = generate_summary_stats(bot_data, estimates=estimates, estimates_key="bot")
+        summary_all = generate_summary_stats(data, estimates=estimates, estimates_key="overall")
+    else:
+        sankey_all = generate_d3_sankey(data, "d3-sankey-all")
+        summary_all = generate_summary_stats(data, estimates=estimates, estimates_key="overall")
 
     table_data = []
     for issue in data:
@@ -340,6 +372,7 @@ def generate_html(data, title, projects_str, months, usage_info=None,
             "status": issue.get("status", ""),
             "components": issue.get("components", ""),
             "created": issue.get("created", ""),
+            "is_bot": _get_is_bot(issue),
             "jira_url": f"{JIRA_BASE_URL}/browse/{issue.get('issue_key', '')}",
         })
 
@@ -531,6 +564,34 @@ def generate_html(data, title, projects_str, months, usage_info=None,
     padding-top: 1rem;
     border-top: 1px solid var(--border);
   }}
+  .view-toggle {{
+    display: flex;
+    gap: 0;
+    margin-bottom: 1.5rem;
+    border: 1px solid var(--border);
+    border-radius: 8px;
+    overflow: hidden;
+    width: fit-content;
+  }}
+  .toggle-btn {{
+    background: var(--bg);
+    border: none;
+    border-right: 1px solid var(--border);
+    color: var(--text-muted);
+    padding: 0.5rem 1.2rem;
+    font-size: 0.9rem;
+    cursor: pointer;
+    transition: background 0.15s, color 0.15s;
+  }}
+  .toggle-btn:last-child {{ border-right: none; }}
+  .toggle-btn.active {{
+    background: var(--surface);
+    color: var(--accent);
+    font-weight: 600;
+  }}
+  .toggle-btn:hover {{ color: var(--text); }}
+  .source-human {{ color: #7BC8A4; font-weight: 500; }}
+  .source-bot {{ color: #F5C542; font-weight: 500; }}
 </style>
 </head>
 <body>
@@ -541,9 +602,17 @@ def generate_html(data, title, projects_str, months, usage_info=None,
     {f' &middot; <span style="color:#F5C542;">&#9888; Sampled estimate ({estimates["sample_size"]} of {estimates["total_population"]} issues, {estimates["sample_fraction"]*100:.1f}%)</span>' if estimates else ""}
   </div>
 
+  {f'''<div class="view-toggle">
+    <button class="toggle-btn active" data-view="human" onclick="switchView('human')">Human Only ({len(human_data)})</button>
+    <button class="toggle-btn" data-view="all" onclick="switchView('all')">All ({len(data)})</button>
+    <button class="toggle-btn" data-view="bot" onclick="switchView('bot')">Bot Only ({len(bot_data)})</button>
+  </div>''' if has_bots else ''}
+
   <div class="section">
     <h2>Summary</h2>
-    {summary_html}
+    {f'''<div id="summary-human">{summary_human}</div>
+    <div id="summary-all" style="display:none">{summary_all}</div>
+    <div id="summary-bot" style="display:none">{summary_bot}</div>''' if has_bots else f'''<div id="summary-all">{summary_all}</div>'''}
   </div>
 
   {f"""<div class="section">
@@ -554,9 +623,14 @@ def generate_html(data, title, projects_str, months, usage_info=None,
 
   {generate_ci_chart(estimates) if estimates else ""}
 
+  <script src="https://cdn.jsdelivr.net/npm/d3@7/dist/d3.min.js"></script>
+  <script src="https://cdn.jsdelivr.net/npm/d3-sankey@0.12/dist/d3-sankey.min.js"></script>
+
   <div class="section">
     <h2>Project &rarr; Activity Type</h2>
-    {sankey_html}
+    {f'''<div id="sankey-human">{sankey_human}</div>
+    <div id="sankey-all" style="display:none">{sankey_all}</div>
+    <div id="sankey-bot" style="display:none">{sankey_bot}</div>''' if has_bots else f'''<div id="sankey-all">{sankey_all}</div>'''}
   </div>
 
   <div class="section">
@@ -593,13 +667,18 @@ def generate_html(data, title, projects_str, months, usage_info=None,
         "var JQL_URL_KEY_LIMIT = 100;\n"
         "var ACTIVITY_COLORS = " + json.dumps(ACTIVITY_COLORS) + ";\n"
         "var ACTIVITY_TYPES = " + json.dumps(sorted(ACTIVITY_COLORS.keys())) + ";\n"
+        "var HAS_BOTS = " + json.dumps(has_bots) + ";\n"
     )
     app_js += r"""
 var COLUMNS = [
   {key: "issue_key", label: "Issue Key", width: "120px"},
   {key: "project_key", label: "Project", width: "80px"},
   {key: "activity_type", label: "Activity Type", width: "200px"},
-  {key: "summary", label: "Summary", width: ""},
+  {key: "summary", label: "Summary", width: ""}"""
+    if has_bots:
+        app_js += r""",
+  {key: "is_bot", label: "Source", width: "80px"}"""
+    app_js += r""",
   {key: "issue_type", label: "Type", width: "90px"},
   {key: "status", label: "Status", width: "100px"},
   {key: "components", label: "Components", width: "140px"},
@@ -610,15 +689,59 @@ def generate_html(data, title, projects_str, months, usage_info=None,
 var currentPage = 1, pageSize = 50;
 var filteredData = TABLE_DATA.slice();
 var colFilters = {};
+var currentViewFilter = HAS_BOTS ? "human" : "all";
 
 function escapeHtml(s) {
   if (s == null) return "";
   return String(s).replace(/&/g,"&amp;").replace(/</g,"&lt;").replace(/>/g,"&gt;").replace(/"/g,"&quot;");
 }
 
+function switchView(view) {
+  currentViewFilter = view;
+  // Update toggle buttons
+  var btns = document.querySelectorAll('.toggle-btn');
+  for (var i = 0; i < btns.length; i++) {
+    var btn = btns[i];
+    if (btn.getAttribute('data-view') === view) {
+      btn.classList.add('active');
+    } else {
+      btn.classList.remove('active');
+    }
+  }
+  // Show/hide sankeys and summaries
+  var views = ['human', 'bot', 'all'];
+  for (var v = 0; v < views.length; v++) {
+    var vName = views[v];
+    var sankey = document.getElementById('sankey-' + vName);
+    var summary = document.getElementById('summary-' + vName);
+    if (sankey) {
+      sankey.style.display = vName === view ? 'block' : 'none';
+      // Render pending sankey on first show
+      if (vName === view) {
+        var pending = sankey.querySelector('[data-pending="true"]');
+        if (pending) {
+          pending.removeAttribute('data-pending');
+          // Re-render by re-running the sankey script
+          var scripts = sankey.querySelectorAll('script');
+          for (var s = 0; s < scripts.length; s++) {
+            var newScript = document.createElement('script');
+            newScript.textContent = scripts[s].textContent;
+            scripts[s].parentNode.replaceChild(newScript, scripts[s]);
+          }
+        }
+      }
+    }
+    if (summary) summary.style.display = vName === view ? 'block' : 'none';
+  }
+  applyFilters();
+}
+
 function applyFilters() {
   var globalTerm = document.getElementById("global-search").value.toLowerCase();
   filteredData = TABLE_DATA.filter(function(row) {
+    // View filter (human/bot/all)
+    if (currentViewFilter === "human" && row.is_bot) return false;
+    if (currentViewFilter === "bot" && !row.is_bot) return false;
     // Global search
     if (globalTerm) {
       var match = false;
@@ -632,8 +755,13 @@ def generate_html(data, title, projects_str, months, usage_info=None,
     // Column filters
     for (var col in colFilters) {
       if (!colFilters[col]) continue;
-      var val = String(row[col] || "").toLowerCase();
-      if (val.indexOf(colFilters[col].toLowerCase()) < 0) return false;
+      if (col === "is_bot") {
+        var expected = colFilters[col].toLowerCase() === "bot";
+        if (row.is_bot !== expected) return false;
+      } else {
+        var val = String(row[col] || "").toLowerCase();
+        if (val.indexOf(colFilters[col].toLowerCase()) < 0) return false;
+      }
     }
     return true;
   });
@@ -677,6 +805,10 @@ def generate_html(data, title, projects_str, months, usage_info=None,
       } else if (key === "activity_type") {
         var color = ACTIVITY_COLORS[val] || "#9E9E9E";
         html += '<td' + cls + '><span class="color-dot" style="background:' + color + '"></span>' + escapeHtml(val) + '</td>';
+      } else if (key === "is_bot") {
+        var label = val ? "Bot" : "Human";
+        var cssClass = val ? "source-bot" : "source-human";
+        html += '<td' + cls + '><span class="' + cssClass + '">' + label + '</span></td>';
       } else {
         html += '<td' + cls + '>' + escapeHtml(val) + '</td>';
       }
@@ -751,6 +883,12 @@ def generate_html(data, title, projects_str, months, usage_info=None,
         html += '<option value="' + ACTIVITY_TYPES[t] + '">' + ACTIVITY_TYPES[t] + '</option>';
       }
       html += '</select>';
+    } else if (c.key === "is_bot") {
+      html += '<select class="col-filter" data-col="' + c.key + '" style="width:80px;">';
+      html += '<option value="">All</option>';
+      html += '<option value="Human">Human</option>';
+      html += '<option value="Bot">Bot</option>';
+      html += '</select>';
     } else if (c.key === "summary") {
       // Skip — global search covers this
       continue;
@@ -773,11 +911,15 @@ def generate_html(data, title, projects_str, months, usage_info=None,
 }
 
 function updateCount() {
-  var total = TABLE_DATA.length;
+  var viewTotal = TABLE_DATA.filter(function(r) {
+    if (currentViewFilter === "human" && r.is_bot) return false;
+    if (currentViewFilter === "bot" && !r.is_bot) return false;
+    return true;
+  }).length;
   var count = filteredData.length;
-  var isFiltered = count !== total;
+  var isFiltered = count !== viewTotal;
   document.getElementById("row-count").textContent =
-    isFiltered ? count + " of " + total + " issues" : total + " issues";
+    isFiltered ? count + " of " + viewTotal + " issues" : viewTotal + " issues";
   var hint = document.getElementById("jql-hint");
   if (!isFiltered && count > JQL_URL_KEY_LIMIT) {
     hint.textContent = "Filter the table first, or use Copy JQL for large sets";
diff --git a/plugins/snowflake/scripts/sample_and_estimate.py b/plugins/snowflake/scripts/sample_and_estimate.py
index c4578404b..9c2b23701 100644
--- a/plugins/snowflake/scripts/sample_and_estimate.py
+++ b/plugins/snowflake/scripts/sample_and_estimate.py
@@ -46,70 +46,82 @@
 ]
 
 
+def _get_is_bot(issue):
+    """Extract bot flag from an issue, handling both Snowflake and processed formats."""
+    val = issue.get("IS_BOT", issue.get("is_bot", False))
+    if isinstance(val, str):
+        return val.lower() in ("true", "1", "yes")
+    return bool(val)
+
+
 def stratified_sample(issues, sample_size, seed=42):
-    """Draw a stratified random sample proportional to project size.
+    """Draw a stratified random sample proportional to stratum size.
 
-    Ensures every project gets at least 1 issue in the sample (if possible),
+    Stratifies by (project, is_bot) to ensure both human and bot populations
+    are represented. Guarantees at least 1 issue per stratum (if possible),
     then allocates remaining slots proportionally.
     """
     rng = random.Random(seed)
 
-    by_project = {}
+    by_stratum = {}
     for issue in issues:
         proj = issue.get("PROJECT_KEY", issue.get("project_key", "UNKNOWN"))
-        by_project.setdefault(proj, []).append(issue)
+        is_bot = _get_is_bot(issue)
+        stratum = (proj, "bot" if is_bot else "human")
+        by_stratum.setdefault(stratum, []).append(issue)
 
     total = len(issues)
     n = min(sample_size, total)
 
     if n >= total:
-        return list(issues), {p: len(v) for p, v in by_project.items()}
+        counts = {}
+        for s, v in by_stratum.items():
+            counts[s] = len(v)
+        return list(issues), counts
 
-    # Guarantee at least 1 per project, then proportional allocation
+    # Guarantee at least 1 per stratum, then proportional allocation
     allocations = {}
     remaining = n
-    for proj, proj_issues in by_project.items():
-        allocations[proj] = min(1, len(proj_issues))
-        remaining -= allocations[proj]
+    for stratum, stratum_issues in by_stratum.items():
+        allocations[stratum] = min(1, len(stratum_issues))
+        remaining -= allocations[stratum]
 
     # Distribute remaining proportionally
     if remaining > 0:
         proportional = {}
-        for proj, proj_issues in by_project.items():
-            proportional[proj] = len(proj_issues) / total * n
-        # Subtract already-allocated minimum
-        for proj in by_project:
-            proportional[proj] = max(0, proportional[proj] - allocations[proj])
-        # Normalize to fill remaining slots
+        for stratum, stratum_issues in by_stratum.items():
+            proportional[stratum] = len(stratum_issues) / total * n
+        for stratum in by_stratum:
+            proportional[stratum] = max(0, proportional[stratum] - allocations[stratum])
         prop_total = sum(proportional.values())
         if prop_total > 0:
-            for proj in by_project:
-                extra = int(proportional[proj] / prop_total * remaining)
-                extra = min(extra, len(by_project[proj]) - allocations[proj])
-                allocations[proj] += extra
+            for stratum in by_stratum:
+                extra = int(proportional[stratum] / prop_total * remaining)
+                extra = min(extra, len(by_stratum[stratum]) - allocations[stratum])
+                allocations[stratum] += extra
                 remaining -= extra
 
-        # Distribute any leftover slots to largest projects
+        # Distribute any leftover slots to largest strata
         if remaining > 0:
-            projects_by_size = sorted(by_project.keys(),
-                                      key=lambda p: len(by_project[p]),
-                                      reverse=True)
-            for proj in projects_by_size:
+            strata_by_size = sorted(by_stratum.keys(),
+                                    key=lambda s: len(by_stratum[s]),
+                                    reverse=True)
+            for stratum in strata_by_size:
                 if remaining <= 0:
                     break
-                can_add = len(by_project[proj]) - allocations[proj]
+                can_add = len(by_stratum[stratum]) - allocations[stratum]
                 add = min(can_add, remaining)
-                allocations[proj] += add
+                allocations[stratum] += add
                 remaining -= add
 
     # Draw samples
     sample = []
     sample_counts = {}
-    for proj, count in allocations.items():
-        proj_issues = by_project[proj]
-        drawn = rng.sample(proj_issues, min(count, len(proj_issues)))
+    for stratum, count in allocations.items():
+        stratum_issues = by_stratum[stratum]
+        drawn = rng.sample(stratum_issues, min(count, len(stratum_issues)))
         sample.extend(drawn)
-        sample_counts[proj] = len(drawn)
+        sample_counts[stratum] = len(drawn)
 
     rng.shuffle(sample)
     return sample, sample_counts
@@ -261,6 +273,9 @@ def main():
     with open(args.input) as f:
         all_issues = json.load(f)
     total = len(all_issues)
+    if total == 0:
+        print("No issues to process.", file=sys.stderr)
+        sys.exit(1)
     print(f"Total issues: {total}")
 
     # Auto-recommend sample size
@@ -287,14 +302,35 @@ def main():
 
         print(f"\nSample drawn: {len(sample)} of {total} issues "
               f"({len(sample)/total*100:.1f}%)")
+
+        # Aggregate stratum counts to project-level for display
+        proj_sample = {}
+        for (proj, bot_status), count in sample_counts.items():
+            proj_sample.setdefault(proj, {"human": 0, "bot": 0})
+            proj_sample[proj][bot_status] = count
+
+        proj_totals = {}
+        for i in all_issues:
+            proj = i.get("PROJECT_KEY", i.get("project_key", "UNKNOWN"))
+            proj_totals[proj] = proj_totals.get(proj, 0) + 1
+
+        has_bots = any(v.get("bot", 0) > 0 for v in proj_sample.values())
+
         print("Stratification by project:")
-        for proj in sorted(sample_counts.keys()):
-            proj_total = sum(1 for i in all_issues
-                            if (i.get("PROJECT_KEY", i.get("project_key"))
-                                == proj))
-            pct = (sample_counts[proj] / proj_total * 100) if proj_total else 0.0
-            print(f"  {proj:<20s} {sample_counts[proj]:>4d} of {proj_total:>5d} "
-                  f"({pct:.1f}%)")
+        for proj in sorted(proj_sample.keys()):
+            proj_total = proj_totals.get(proj, 0)
+            sampled = proj_sample[proj]["human"] + proj_sample[proj]["bot"]
+            pct = (sampled / proj_total * 100) if proj_total else 0.0
+            bot_info = ""
+            if has_bots and proj_sample[proj]["bot"] > 0:
+                bot_info = f"  (human: {proj_sample[proj]['human']}, bot: {proj_sample[proj]['bot']})"
+            print(f"  {proj:<20s} {sampled:>4d} of {proj_total:>5d} "
+                  f"({pct:.1f}%){bot_info}")
+
+        if has_bots:
+            total_bot = sum(v["bot"] for v in proj_sample.values())
+            total_human = sum(v["human"] for v in proj_sample.values())
+            print(f"\n  Total: {total_human} human + {total_bot} bot = {len(sample)} sampled")
 
         print(f"\nSample written to: {args.draw_sample}")
         print("Next: classify this sample with classify_issues.py, "
@@ -308,6 +344,12 @@ def main():
 
         print(f"Classified sample: {len(classified)} issues")
 
+        # Split by bot status
+        human_classified = [i for i in classified if not _get_is_bot(i)]
+        bot_classified = [i for i in classified if _get_is_bot(i)]
+        human_total = sum(1 for i in all_issues if not _get_is_bot(i))
+        bot_total = total - human_total
+
         # Overall estimates
         overall = estimate_distribution(
             classified, confidence=args.confidence, seed=args.seed
@@ -318,6 +360,25 @@ def main():
             classified, confidence=args.confidence, seed=args.seed
         )
 
+        # Human-only and bot-only estimates
+        human_estimates = None
+        bot_estimates = None
+        if human_classified and bot_classified:
+            human_estimates = {
+                "population": human_total,
+                "sample_size": len(human_classified),
+                **estimate_distribution(
+                    human_classified, confidence=args.confidence, seed=args.seed
+                ),
+            }
+            bot_estimates = {
+                "population": bot_total,
+                "sample_size": len(bot_classified),
+                **estimate_distribution(
+                    bot_classified, confidence=args.confidence, seed=args.seed
+                ),
+            }
+
         result = {
             "method": "Dirichlet-Multinomial Bayesian estimation",
             "total_population": total,
@@ -326,6 +387,8 @@ def main():
             "confidence": args.confidence,
             "seed": args.seed,
             "overall": overall,
+            "human": human_estimates,
+            "bot": bot_estimates,
             "by_project": per_project,
         }
 
@@ -346,6 +409,22 @@ def main():
             print(f"{est['category']:<45s} {mean_pct:>5.1f}%  "
                   f"[{lo_pct:>5.1f}% — {hi_pct:>5.1f}%]")
 
+        if human_estimates and bot_estimates:
+            print(f"\nBot/Human Split: {human_total} human + {bot_total} bot "
+                  f"= {total} total")
+            print(f"  Sample: {len(human_classified)} human + "
+                  f"{len(bot_classified)} bot = {len(classified)}")
+
+            print(f"\nHuman Work ({len(human_classified)} of {human_total}):")
+            for est in human_estimates["estimates"]:
+                mean_pct = est["posterior_mean"] * 100
+                print(f"  {est['category']:<45s} {mean_pct:>5.1f}%")
+
+            print(f"\nAutomated/Bot Work ({len(bot_classified)} of {bot_total}):")
+            for est in bot_estimates["estimates"]:
+                mean_pct = est["posterior_mean"] * 100
+                print(f"  {est['category']:<45s} {mean_pct:>5.1f}%")
+
         if args.output:
             os.makedirs(os.path.dirname(os.path.abspath(args.output)),
                         exist_ok=True)
diff --git a/plugins/snowflake/scripts/test_sample_and_estimate.py b/plugins/snowflake/scripts/test_sample_and_estimate.py
new file mode 100644
index 000000000..134f53a88
--- /dev/null
+++ b/plugins/snowflake/scripts/test_sample_and_estimate.py
@@ -0,0 +1,170 @@
+#!/usr/bin/env python3
+"""Tests for _get_is_bot() and stratified_sample() in sample_and_estimate.py."""
+
+import unittest
+from sample_and_estimate import _get_is_bot, stratified_sample
+
+
+class TestGetIsBot(unittest.TestCase):
+
+    def test_bool_true(self):
+        self.assertTrue(_get_is_bot({"IS_BOT": True}))
+
+    def test_bool_false(self):
+        self.assertFalse(_get_is_bot({"IS_BOT": False}))
+
+    def test_string_true_lowercase(self):
+        self.assertTrue(_get_is_bot({"IS_BOT": "true"}))
+
+    def test_string_true_uppercase(self):
+        self.assertTrue(_get_is_bot({"IS_BOT": "TRUE"}))
+
+    def test_string_true_mixed_case(self):
+        self.assertTrue(_get_is_bot({"IS_BOT": "True"}))
+
+    def test_string_one(self):
+        self.assertTrue(_get_is_bot({"IS_BOT": "1"}))
+
+    def test_string_yes(self):
+        self.assertTrue(_get_is_bot({"IS_BOT": "yes"}))
+
+    def test_string_false(self):
+        self.assertFalse(_get_is_bot({"IS_BOT": "false"}))
+
+    def test_string_zero(self):
+        self.assertFalse(_get_is_bot({"IS_BOT": "0"}))
+
+    def test_string_no(self):
+        self.assertFalse(_get_is_bot({"IS_BOT": "no"}))
+
+    def test_int_one(self):
+        self.assertTrue(_get_is_bot({"IS_BOT": 1}))
+
+    def test_int_zero(self):
+        self.assertFalse(_get_is_bot({"IS_BOT": 0}))
+
+    def test_none_value(self):
+        self.assertFalse(_get_is_bot({"IS_BOT": None}))
+
+    def test_missing_key_defaults_false(self):
+        self.assertFalse(_get_is_bot({}))
+
+    def test_lowercase_key(self):
+        self.assertTrue(_get_is_bot({"is_bot": True}))
+
+    def test_lowercase_key_string(self):
+        self.assertFalse(_get_is_bot({"is_bot": "false"}))
+
+    def test_uppercase_takes_precedence(self):
+        self.assertTrue(_get_is_bot({"IS_BOT": True, "is_bot": False}))
+
+
+class TestStratifiedSample(unittest.TestCase):
+
+    def _make_issues(self, specs):
+        """Build issue list from (project, is_bot, count) tuples."""
+        issues = []
+        for proj, is_bot, count in specs:
+            for i in range(count):
+                issues.append({
+                    "PROJECT_KEY": proj,
+                    "IS_BOT": is_bot,
+                    "ISSUEKEY": f"{proj}-{i}",
+                })
+        return issues
+
+    def test_sample_size_exceeds_total_returns_all(self):
+        issues = self._make_issues([("A", False, 5)])
+        sample, counts = stratified_sample(issues, 100)
+        self.assertEqual(len(sample), 5)
+        self.assertEqual(counts[("A", "human")], 5)
+
+    def test_sample_equals_total_returns_all(self):
+        issues = self._make_issues([("A", False, 10)])
+        sample, counts = stratified_sample(issues, 10)
+        self.assertEqual(len(sample), 10)
+
+    def test_every_stratum_gets_at_least_one(self):
+        issues = self._make_issues([
+            ("A", False, 100),
+            ("A", True, 100),
+            ("B", False, 5),
+            ("B", True, 3),
+        ])
+        sample, counts = stratified_sample(issues, 10)
+        self.assertEqual(len(sample), 10)
+        self.assertGreaterEqual(counts[("A", "human")], 1)
+        self.assertGreaterEqual(counts[("A", "bot")], 1)
+        self.assertGreaterEqual(counts[("B", "human")], 1)
+        self.assertGreaterEqual(counts[("B", "bot")], 1)
+
+    def test_all_human_no_bot_strata(self):
+        issues = self._make_issues([
+            ("A", False, 50),
+            ("B", False, 50),
+        ])
+        sample, counts = stratified_sample(issues, 20)
+        self.assertEqual(len(sample), 20)
+        self.assertNotIn(("A", "bot"), counts)
+        self.assertNotIn(("B", "bot"), counts)
+        self.assertIn(("A", "human"), counts)
+        self.assertIn(("B", "human"), counts)
+
+    def test_all_bot_no_human_strata(self):
+        issues = self._make_issues([("A", True, 30)])
+        sample, counts = stratified_sample(issues, 10)
+        self.assertEqual(len(sample), 10)
+        self.assertIn(("A", "bot"), counts)
+        self.assertNotIn(("A", "human"), counts)
+
+    def test_proportional_allocation(self):
+        issues = self._make_issues([
+            ("A", False, 900),
+            ("A", True, 100),
+        ])
+        sample, counts = stratified_sample(issues, 100)
+        self.assertEqual(len(sample), 100)
+        self.assertGreater(counts[("A", "human")], counts[("A", "bot")])
+
+    def test_deterministic_with_seed(self):
+        issues = self._make_issues([
+            ("A", False, 50),
+            ("A", True, 50),
+        ])
+        s1, c1 = stratified_sample(issues, 20, seed=123)
+        s2, c2 = stratified_sample(issues, 20, seed=123)
+        self.assertEqual([i["ISSUEKEY"] for i in s1],
+                         [i["ISSUEKEY"] for i in s2])
+
+    def test_different_seeds_differ(self):
+        issues = self._make_issues([
+            ("A", False, 100),
+            ("A", True, 100),
+        ])
+        s1, _ = stratified_sample(issues, 20, seed=1)
+        s2, _ = stratified_sample(issues, 20, seed=2)
+        keys1 = set(i["ISSUEKEY"] for i in s1)
+        keys2 = set(i["ISSUEKEY"] for i in s2)
+        self.assertNotEqual(keys1, keys2)
+
+    def test_return_counts_use_tuple_keys(self):
+        issues = self._make_issues([
+            ("PROJ", False, 10),
+            ("PROJ", True, 5),
+        ])
+        _, counts = stratified_sample(issues, 8)
+        for key in counts:
+            self.assertIsInstance(key, tuple)
+            self.assertEqual(len(key), 2)
+
+    def test_lowercase_keys_handled(self):
+        issues = [
+            {"project_key": "X", "is_bot": False, "ISSUEKEY": "X-1"},
+            {"project_key": "X", "is_bot": True, "ISSUEKEY": "X-2"},
+        ]
+        sample, counts = stratified_sample(issues, 2)
+        self.assertEqual(len(sample), 2)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/plugins/snowflake/skills/setup-snowflake/SKILL.md b/plugins/snowflake/skills/setup-snowflake/SKILL.md
index f829961d8..9f1d978cd 100644
--- a/plugins/snowflake/skills/setup-snowflake/SKILL.md
+++ b/plugins/snowflake/skills/setup-snowflake/SKILL.md
@@ -147,14 +147,14 @@ Then **abort the current command gracefully**. Do not attempt to proceed to Step
 Once the MCP tool is confirmed available, set the database, schema, and role for the session:
 
 ```text
-mcp__snowflake__execute_sql(query="USE ROLE JIRA_CLOUDMARTS_GROUP")
+mcp__snowflake__execute_sql(query="USE ROLE PUBLIC")
 mcp__snowflake__execute_sql(query="USE DATABASE JIRA_DB")
 mcp__snowflake__execute_sql(query="USE SCHEMA CLOUDRHAI_MARTS")
 ```
 
 If any of these fail (e.g., role not granted), inform the user:
 
-> Your Snowflake account does not have the `JIRA_CLOUDMARTS_GROUP` role. This role is required to access Jira data in Snowflake. Please request this role through the access provisioning process at:
+> Your Snowflake account does not have the `PUBLIC` role. Please request access through the access provisioning process at:
 >
 > **https://dataverse.pages.redhat.com/data-docs/data-users/**