diff --git a/PLUGINS.md b/PLUGINS.md index 4191fb1ed..5d99b1eb3 100644 --- a/PLUGINS.md +++ b/PLUGINS.md @@ -78,6 +78,7 @@ Tools for working with OpenShift CI and analyzing Prow job results - **`/ci:ask-sippy` `[question]`** - Ask the Sippy AI agent questions about OpenShift CI payloads, jobs, and test results - **`/ci:check-if-jira-regression-is-ongoing` ``** - Check if the regression described in a Jira bug is still ongoing or has resolved - **`/ci:continue-session` ``** - Download and continue a Claude session from a Prow CI job's artifacts +- **`/ci:detect-backport-regressions` `[--current-release ] [--lookback N] [--days N]`** - Detect regressions that have cascaded from newer releases to older releases due to problematic backports - **`/ci:extract-kubeconfig` ``** - Extract kubeconfig from a running CI job in a PR - **`/ci:extract-prow-job-must-gather` `prowjob-url`** - Extract and decompress must-gather archives from Prow job artifacts - **`/ci:fetch-payloads` `[architecture] [version] [stream]`** - Fetch recent release payloads from the OpenShift release controller diff --git a/docs/data.json b/docs/data.json index 4b6add627..f7b60c858 100644 --- a/docs/data.json +++ b/docs/data.json @@ -359,6 +359,12 @@ "name": "continue-session", "synopsis": "/ci:continue-session " }, + { + "argument_hint": "[--current-release ] [--lookback N] [--days N]", + "description": "Detect regressions that have cascaded from newer releases to older releases due to problematic backports", + "name": "detect-backport-regressions", + "synopsis": "/ci:detect-backport-regressions [--current-release ] [--lookback N] [--days N] [--exclude-install] [--exclude-monitor] [--component ] [--min-cascade N] [--include-resolved]" + }, { "argument_hint": "", "description": "Extract kubeconfig from a running CI job in a PR", @@ -449,6 +455,11 @@ "hooks": [], "name": "ci", "skills": [ + { + "description": "Perform deep root cause analysis on potential cascades to confirm they are real backport cascades with the same underlying issue", + "id": "analyze-cascade-similarity", + "name": "Analyze Cascade Similarity" + }, { "description": "Analyze and compare disruption across one or more Prow CI job runs by examining interval data, audit logs, pod logs, and CPU metrics", "id": "analyze-disruption", @@ -459,6 +470,16 @@ "id": "analyze-payload", "name": "Analyze Payload" }, + { + "description": "Detect regressions that have cascaded from newer releases to older releases due to problematic backports", + "id": "detect-backport-regressions", + "name": "Detect Backport Regressions" + }, + { + "description": "Identify regressions that appear across multiple releases by matching test names and checking temporal ordering", + "id": "detect-potential-cascades", + "name": "Detect Potential Cascades" + }, { "description": "Fetch JIRA issue details including status, assignee, comments, and progress classification", "id": "fetch-jira-issue", @@ -509,6 +530,11 @@ "id": "fetch-test-runs", "name": "Fetch Test Runs" }, + { + "description": "Generate interactive HTML, Markdown, or JSON reports from confirmed cascade data with similarity analysis results", + "id": "generate-cascade-report", + "name": "Generate Cascade Report" + }, { "description": "Helper skill to retrieve OAuth tokens from the correct OpenShift cluster context when multiple clusters are configured", "id": "oc-auth", diff --git a/plugins/ci/.claude-plugin/plugin.json b/plugins/ci/.claude-plugin/plugin.json index e8f93194a..9d5c6014e 100644 --- a/plugins/ci/.claude-plugin/plugin.json +++ b/plugins/ci/.claude-plugin/plugin.json @@ -1,7 +1,7 @@ { "name": "ci", "description": "Tools for working with OpenShift CI and analyzing Prow job results", - "version": "0.0.38", + "version": "0.0.39", "author": { "name": "openshift" } diff --git a/plugins/ci/commands/detect-backport-regressions.md b/plugins/ci/commands/detect-backport-regressions.md new file mode 100644 index 000000000..7212e443a --- /dev/null +++ b/plugins/ci/commands/detect-backport-regressions.md @@ -0,0 +1,251 @@ +--- +description: Detect regressions that have cascaded from newer releases to older releases due to problematic backports +argument-hint: "[--current-release ] [--lookback N] [--days N]" +--- + +## Name + +ci:detect-backport-regressions + +## Synopsis + +``` +/ci:detect-backport-regressions [--current-release ] [--lookback N] [--days N] [--exclude-install] [--exclude-monitor] [--component ] [--min-cascade N] [--include-resolved] +``` + +## Description + +The `ci:detect-backport-regressions` command identifies regressions that have cascaded backward from the current development release to older stable releases due to problematic backports. + +**Problem Pattern**: When a regression is discovered in the active development branch (e.g., 4.22-main) and a bug is filed, the underlying code change that caused the regression may be backported to previous release branches. This creates a cascading effect where the same regression appears in older releases over time. + +**Real-World Example**: [OCPBUGS-75200](https://issues.redhat.com/browse/OCPBUGS-75200) - A systemd change from the MCO team exposed a problem with kube-apiserver graceful termination. The regression was found in 4.22, a bug was filed, but the cause was backported all the way to 4.18 before anyone noticed. + +### Detection Strategy + +The command: +1. Fetches open regressions from the current development release +2. Walks backward through previous N releases (n-1, n-2, n-3, etc.) +3. Identifies matching test failures (by test name) that appeared AFTER the dev branch regression +4. **Analyzes test failure similarity** using `ci:analyze-prow-job-test-failure` to confirm failures are happening for the **same reason** +5. Filters out false positives where the same test fails for different reasons +6. Highlights cases where a triaged regression is spreading backward with the same root cause +7. Provides actionable alerts to halt further backports + +### Key Features + +- **Test name matching**: Matches regressions across releases by test name to identify potential cascades +- **Similarity analysis**: Uses `ci:analyze-prow-job-test-failure` to compare actual error messages and root causes +- **Temporal analysis**: Detects when older release regressions appeared AFTER dev branch regression +- **False positive filtering**: Excludes cases where the same test fails for different, unrelated reasons +- **Triage linkage**: Prioritizes regressions that already have JIRA bugs filed +- **Smart filtering**: Excludes installation/infrastructure noise and Monitor tests by default + - Monitor tests are invariant checks that fail for many different reasons + - Excluding them reduces typical cascade count from ~25 to ~6, focusing on real functional failures +- **Actionable output**: Provides specific recommendations to halt problematic backports with similarity evidence + +## Implementation + +This command orchestrates three focused skills in sequence: + +### Step 1: Detect Potential Cascades + +Load the **"Detect Potential Cascades"** skill (`detect-potential-cascades`): + +1. Auto-detect the current development release or use provided `--current-release` +2. Calculate the list of previous releases to scan based on `--lookback` parameter +3. Fetch regression data for all releases using `teams:list-regressions` skill +4. Match regressions by test name across releases +5. Check temporal ordering (older release regression appeared AFTER dev release) +6. Calculate severity based on cascade extent and triage status +7. Output potential cascades to `.work/detect-backport-regressions/potential_cascades.json` + +### Step 2: Analyze Cascade Similarity + +Load the **"Analyze Cascade Similarity"** skill (`analyze-cascade-similarity`): + +1. Read potential cascades from Step 1 +2. For each cascade, fetch Prow job URLs using `ci:fetch-regression-details` +3. Launch parallel Task agents to analyze each failure with `ci:prow-job-analyze-test-failure --fast` +4. Extract ANALYSIS_RESULT blocks from each agent +5. Compare root causes across releases (components, error patterns, summaries) +6. Calculate similarity scores and determine if same root cause +7. Output confirmed cascades and false positives to `.work/detect-backport-regressions/confirmed_cascades.json` + +### Step 3: Generate Report + +Load the **"Generate Cascade Report"** skill (`generate-cascade-report`): + +1. Read confirmed cascades from Step 2 +2. Generate report in requested format (HTML, Markdown, or JSON) +3. Include similarity analysis results and visualizations +4. Save HTML report to current working directory or output to stdout + +## Return Value +**Converting `test_details_url` to UI URL**: The `test_details_url` from the API is an API endpoint not suitable for display or bug reports. Convert it to the UI URL by replacing the base path. The query parameters are identical: + + ```bash + # Convert API URL to UI URL + test_details_ui_url=$(echo "$test_details_url" | sed 's|https://sippy.dptools.openshift.org/api/component_readiness/test_details|https://sippy-auth.dptools.openshift.org/sippy-ng/component_readiness/test_details|') + ``` + + Always use the converted `test_details_ui_url` when displaying the link in the report or including it in bug descriptions. + + See `plugins/ci/skills/fetch-regression-details/SKILL.md` for complete implementation details. + +**Default Output**: Interactive HTML report saved to current working directory + +**Available Formats**: +- **`--format html`** (default): Generates attractive, self-contained HTML file saved to current working directory +- **`--format markdown`**: Prints markdown report to stdout +- **`--format json`**: Prints JSON data to stdout for automation + +### HTML Report Features + +The default HTML report is an interactive, self-contained file generated with: +- **Executive Summary Dashboard**: Color-coded severity cards with total cascade counts +- **Collapsible Sections**: Click to expand/collapse each regression (critical/high auto-expanded) +- **Color-Coded Severity**: Visual indicators (red=critical, orange=high, yellow=medium, blue=low) +- **Interactive Tables**: Hover effects and clear data presentation +- **Embedded Links**: Direct links to Sippy regression details and JIRA bugs +- **Similarity Analysis**: Visual checkmarks showing confirmed matches across releases +- **Responsive Design**: Works on desktop and mobile browsers +- **Self-Contained**: All CSS and JavaScript embedded, no external dependencies + +**Report Contents**: +- Executive summary with cascade statistics +- Per-regression cascade timelines showing progression from dev → older releases +- **Similarity analysis results** showing how failures match across releases +- Sample error messages and root causes from each release +- JIRA bug links and triage information +- Severity classification (CRITICAL/HIGH/MEDIUM/LOW) +- Recommended actions (halt specific backports, review recent changes) +- Component analysis showing which teams are affected + +**Severity Levels**: +- **CRITICAL**: Triaged regression cascaded to 3+ older releases +- **HIGH**: Triaged regression cascaded to 2 older releases +- **MEDIUM**: Triaged regression cascaded to 1 older release +- **LOW**: Untriaged regression showing cascade pattern + +## Examples + +1. **Basic scan of current dev release and last 4 releases**: + ``` + /ci:detect-backport-regressions + ``` + +2. **Focus on specific component with extended lookback**: + ``` + /ci:detect-backport-regressions --component kube-apiserver --lookback 6 + ``` + +3. **Only show critical cascades (2+ releases)**: + ``` + /ci:detect-backport-regressions --min-cascade 2 + ``` + +4. **Include install failures, scan last 60 days**: + ``` + /ci:detect-backport-regressions --exclude-install false --days 60 + ``` + +5. **Manual release override**: + ``` + /ci:detect-backport-regressions --current-release 4.22 --lookback 3 + ``` + +6. **Show historical cascade patterns including resolved regressions**: + ``` + /ci:detect-backport-regressions --include-resolved + ``` + +7. **Generate markdown report to stdout**: + ``` + /ci:detect-backport-regressions --format markdown --days 45 + ``` + +8. **Include Monitor tests (not recommended)**: + ``` + /ci:detect-backport-regressions --exclude-monitor false + ``` + Note: This will include Monitor/invariant tests which often create false positives + +## Arguments + +- `--current-release ` (optional): Override auto-detection of current dev release + - Default: Auto-detect from Sippy API + - Format: "4.22" + +- `--lookback N` (optional): Number of previous releases to scan + - Default: 4 (e.g., scan 4.21, 4.20, 4.19, 4.18) + - Range: 1-6 + +- `--days N` (optional): Time window for cascade detection + - Default: 30 days + - Only flag older release regressions that appeared within last N days + +- `--exclude-install` (optional): Exclude installation/infrastructure failures + - Default: true + - Set to `false` to include all failure types + +- `--exclude-monitor` (optional): Exclude Monitor/invariant tests + - Default: true + - Set to `false` to include Monitor tests + - Monitor tests are test framework invariant checks like `[Monitor:pod-network-availability]` + - These tests fail for many different platform-specific and transient reasons + - Excluding them reduces typical cascade count from ~25 to ~6, focusing on functional tests + - **Recommended**: Keep this enabled (default) for most use cases + +- `--component ` (optional): Focus on specific component + - Example: `--component kube-apiserver` + - Uses fuzzy matching (same as list-regressions) + +- `--min-cascade N` (optional): Minimum number of releases cascade must affect + - Default: 1 + - Example: `--min-cascade 2` only shows regressions in 2+ older releases + +- `--format ` (optional): Output format + - Default: html (saves interactive report to current working directory as `backport-regression-report_YYYYMMDD_HHMMSS.html`) + - `markdown`: Plain text markdown report printed to stdout + - `json`: Machine-readable output for automation (prints to stdout) + +- `--include-resolved` (optional): Include resolved (closed) regressions in stable releases + - Default: false (only show active cascades) + - Set to `true` to see historical cascade patterns even if resolved + - Note: Development release always includes both open and closed to find origin regressions + +- `--analyze-similarity ` (optional): Enable automated similarity analysis + - Default: true + - When enabled, analyzes test failure outputs to confirm same root cause + - Filters out false positives where same test fails for different reasons + - Use `--analyze-similarity false` to skip similarity analysis and only match by test name + +- `--similarity-threshold N` (optional): Minimum similarity score (0.0-1.0) + - Default: 0.6 + - Only used when `--analyze-similarity` is enabled + - Higher values require stricter matching (fewer false positives, more false negatives) + +## Skills Used + +This command orchestrates three main skills: + +1. **`detect-potential-cascades`** - Identifies potential cascades by test name matching + - Uses: `ci:fetch-releases`, `teams:list-regressions` + - Output: `.work/detect-backport-regressions/potential_cascades.json` + +2. **`analyze-cascade-similarity`** - Performs root cause analysis and similarity comparison + - Uses: `ci:fetch-regression-details`, `ci:prow-job-analyze-test-failure` (via parallel Task agents) + - Output: `.work/detect-backport-regressions/confirmed_cascades.json` + - Skipped if `--analyze-similarity false` is specified + +3. **`generate-cascade-report`** - Generates final report in requested format + - Output: HTML file, Markdown to stdout, or JSON to stdout + +## Prerequisites + +1. **Python 3.6+**: Required to run the detection script +2. **Network access**: Must reach Sippy API endpoints +3. **Installed plugins**: + - `ci` plugin (for fetch-releases skill) + - `teams` plugin (for list-regressions skill) diff --git a/plugins/ci/skills/analyze-cascade-similarity/SKILL.md b/plugins/ci/skills/analyze-cascade-similarity/SKILL.md new file mode 100644 index 000000000..76363ac45 --- /dev/null +++ b/plugins/ci/skills/analyze-cascade-similarity/SKILL.md @@ -0,0 +1,445 @@ +--- +name: Analyze Cascade Similarity +description: Perform deep root cause analysis on potential cascades to confirm they are real backport cascades with the same underlying issue +--- + +# Analyze Cascade Similarity + +This skill performs deep root cause analysis on potential cascades identified by the `detect-potential-cascades` skill. It fetches actual Prow job failures, launches parallel subagents to analyze each failure, and compares the root causes to determine if cascades are real or false positives. + +## When to Use This Skill + +Use this skill when you need to: + +- Confirm that potential cascades are real cascades with the same root cause +- Filter out false positives where the same test fails for different reasons +- Get detailed root cause analysis for each release in a cascade +- Generate similarity scores and comparison data for cascade validation + +## Prerequisites + +1. **Input file**: `.work/detect-backport-regressions/potential_cascades.json` from `detect-potential-cascades` skill +2. **Network Access**: Must reach Sippy API and GCS (Google Cloud Storage for Prow artifacts) +3. **Available skills**: + - `ci:fetch-regression-details` - Get Prow job URLs from regression IDs + - `ci:prow-job-analyze-test-failure` - Analyze individual Prow job failures + +## Implementation Steps + +### Step 1: Load Potential Cascades + +Read the JSON output from the `detect-potential-cascades` skill: + +```bash +potential_cascades=$(cat .work/detect-backport-regressions/potential_cascades.json) +``` + +Parse the data: + +```python +import json +data = json.loads(potential_cascades) +cascades = data["potential_cascades"] +``` + +### Step 2: For Each Potential Cascade, Fetch Prow Job URLs + +For each cascade, you need representative Prow job URLs from each affected release. + +**Get regression IDs**: + +```python +cascade = cascades[0] # Example: first cascade +origin_regression_id = cascade["origin"]["regression_id"] +cascade_regression_ids = { + rel["release"]: rel["regression_id"] + for rel in cascade["cascade_releases"] +} +``` + +**Fetch Prow URLs using fetch-regression-details**: + +```bash +# For origin release (e.g., 4.22) +python3 plugins/ci/skills/fetch-regression-details/fetch_regression_details.py \ + "$origin_regression_id" \ + --format json \ + | jq -r '[.sample_failed_jobs | to_entries[].value.failed_runs[0].job_url] | .[0]' \ + > .work/detect-backport-regressions/cascade_${cascade_index}/4.22_prow_url.txt + +# For each cascade release +for release in "${cascade_releases[@]}"; do + regression_id="${cascade_regression_ids[$release]}" + python3 plugins/ci/skills/fetch-regression-details/fetch_regression_details.py \ + "$regression_id" \ + --format json \ + | jq -r '[.sample_failed_jobs | to_entries[].value.failed_runs[0].job_url] | .[0]' \ + > .work/detect-backport-regressions/cascade_${cascade_index}/${release}_prow_url.txt +done +``` + +**Handle cases where no Prow URL is available**: +- Some regressions may not have `sample_failed_jobs` yet +- Log a warning and mark that release as "unable to analyze" +- Continue with other releases that have URLs + +### Step 3: Launch Parallel Subagents for Root Cause Analysis + +**CRITICAL**: Launch subagents **in parallel** (single message with multiple Task tool calls) for maximum performance. + +**Subagent Prompt Template**: + +``` +Analyze the test failure "{test_name}" in this Prow job: {prow_url} + +Use the ci:prow-job-analyze-test-failure skill to perform a thorough root cause analysis. + +IMPORTANT: +- Trace to the actual root cause, not just symptoms +- Download log bundles, examine pod logs, cite specific error messages +- Never stop at symptoms like "0 nodes ready", "operator degraded", or "crash-looping" +- Look for the underlying reason (network issues, CNI config loss, registry failures, etc.) + +At the end of your analysis, you MUST provide an ANALYSIS_RESULT block in this exact format: + +ANALYSIS_RESULT: +- root_cause_summary: +- affected_components: +- key_error_patterns: +- known_symptoms: +- test_name: {test_name} +- confidence_level: <1-5, where 5 is highest confidence in the root cause> +- release: {release} +``` + +**Launch agents in parallel**: + +```python +# Build list of (release, prow_url) tuples +analysis_tasks = [ + ("4.22", prow_url_422), + ("4.21", prow_url_421), + ("4.20", prow_url_420), + # ... etc +] + +# Launch all agents in a SINGLE message with multiple Task tool calls +# This is critical for parallel execution +agents = [] +for release, prow_url in analysis_tasks: + if prow_url and prow_url != "null": + agents.append({ + "release": release, + "prow_url": prow_url, + "prompt": build_analysis_prompt(test_name, prow_url, release) + }) + +# Agent launches all tasks in parallel and waits for all to complete +``` + +### Step 4: Extract and Parse ANALYSIS_RESULT Blocks + +From each subagent response, extract the `ANALYSIS_RESULT` block: + +```python +def extract_analysis_result(agent_response: str, release: str) -> dict: + """Extract structured analysis result from agent response.""" + + # Find ANALYSIS_RESULT block + if "ANALYSIS_RESULT:" not in agent_response: + return { + "error": "No ANALYSIS_RESULT block found", + "release": release, + "confidence_level": 0 + } + + # Parse the structured data + result = { + "release": release, + "raw_response": agent_response + } + + lines = agent_response.split("ANALYSIS_RESULT:")[1].strip().split("\n") + for line in lines: + if line.startswith("- "): + key_value = line[2:].split(":", 1) + if len(key_value) == 2: + key = key_value[0].strip() + value = key_value[1].strip() + result[key] = value + + return result + +# Extract results from all agents +analysis_results = {} +for agent in completed_agents: + release = agent["release"] + analysis_results[release] = extract_analysis_result( + agent["response"], + release + ) +``` + +### Step 5: Compare Root Causes Across Releases + +**Similarity Analysis**: + +Compare the ANALYSIS_RESULT blocks to determine if failures have the same root cause: + +```python +def compare_root_causes(results: dict) -> dict: + """ + Compare analysis results across releases. + + Returns: + { + "same_root_cause": bool, + "similarity_score": float (0.0-1.0), + "comparison_details": {...} + } + """ + + if len(results) < 2: + return { + "same_root_cause": False, + "similarity_score": 0.0, + "reason": "Insufficient data for comparison" + } + + # Extract origin release result + origin_release = list(results.keys())[0] + origin_result = results[origin_release] + + # Compare with each cascade release + comparisons = [] + for release, result in results.items(): + if release == origin_release: + continue + + comparison = compare_two_results(origin_result, result) + comparisons.append(comparison) + + # Aggregate comparisons + if not comparisons: + return {"same_root_cause": False, "similarity_score": 0.0} + + avg_similarity = sum(c["similarity_score"] for c in comparisons) / len(comparisons) + all_same = all(c["same_root_cause"] for c in comparisons) + + return { + "same_root_cause": all_same, + "similarity_score": avg_similarity, + "comparisons": comparisons, + "origin_release": origin_release + } + +def compare_two_results(result1: dict, result2: dict) -> dict: + """Compare two analysis results.""" + + # Check confidence levels + conf1 = int(result1.get("confidence_level", 0)) + conf2 = int(result2.get("confidence_level", 0)) + + if conf1 < 3 or conf2 < 3: + return { + "same_root_cause": False, + "similarity_score": 0.0, + "reason": "Low confidence in one or both analyses" + } + + # Compare affected components + components1 = set(result1.get("affected_components", "").lower().split(", ")) + components2 = set(result2.get("affected_components", "").lower().split(", ")) + + component_overlap = len(components1 & components2) / max(len(components1 | components2), 1) + + # Compare error patterns + patterns1 = set(result1.get("key_error_patterns", "").lower().split(", ")) + patterns2 = set(result2.get("key_error_patterns", "").lower().split(", ")) + + # Look for common error substrings (fuzzy matching) + pattern_matches = 0 + for p1 in patterns1: + for p2 in patterns2: + if p1 in p2 or p2 in p1: + pattern_matches += 1 + break + + pattern_similarity = pattern_matches / max(len(patterns1), len(patterns2), 1) + + # Compare root cause summaries (semantic similarity) + summary1 = result1.get("root_cause_summary", "").lower() + summary2 = result2.get("root_cause_summary", "").lower() + + # Simple keyword matching (can be enhanced with NLP) + words1 = set(summary1.split()) + words2 = set(summary2.split()) + summary_overlap = len(words1 & words2) / max(len(words1 | words2), 1) + + # Calculate overall similarity score + similarity_score = ( + component_overlap * 0.4 + + pattern_similarity * 0.4 + + summary_overlap * 0.2 + ) + + # Determine if same root cause (threshold: 0.6) + same_root_cause = similarity_score >= 0.6 and component_overlap > 0.3 + + return { + "same_root_cause": same_root_cause, + "similarity_score": similarity_score, + "component_overlap": component_overlap, + "pattern_similarity": pattern_similarity, + "summary_overlap": summary_overlap + } +``` + +### Step 6: Generate Output JSON + +Create structured output with confirmed cascades and false positives: + +```json +{ + "generated": "2026-04-02T22:30:00Z", + "analysis_type": "root_cause_comparison", + "similarity_analyzed": true, + "confirmed_cascades": [ + { + "test_name": "[sig-etcd] etcd should not lose data", + "severity": "HIGH", + "same_root_cause": true, + "similarity_score": 0.85, + "origin": { + "release": "4.22", + "regression_id": 35000, + "root_cause_summary": "etcd data corruption due to fsync failures", + "affected_components": "etcd, storage", + "confidence_level": 5 + }, + "cascade_releases": [ + { + "release": "4.21", + "regression_id": 36000, + "root_cause_summary": "etcd data corruption due to fsync failures", + "affected_components": "etcd, storage", + "confidence_level": 5, + "similarity_to_origin": 0.85 + } + ], + "analysis_by_release": { + "4.22": { /* full ANALYSIS_RESULT */ }, + "4.21": { /* full ANALYSIS_RESULT */ } + } + } + ], + "false_positives": [ + { + "test_name": "[sig-arch][Feature:ClusterUpgrade] Cluster should remain functional", + "severity": "CRITICAL", + "same_root_cause": false, + "similarity_score": 0.15, + "reason": "Different root causes across releases", + "origin": { + "release": "4.22", + "root_cause_summary": "Local registry ImagePullBackOff on bare metal" + }, + "cascade_releases": [ + { + "release": "4.21", + "root_cause_summary": "CNI configuration loss during upgrade", + "similarity_to_origin": 0.10 + }, + { + "release": "4.20", + "root_cause_summary": "External quay.io registry HTTP 502 errors", + "similarity_to_origin": 0.20 + } + ], + "analysis_by_release": { + "4.22": { /* full ANALYSIS_RESULT */ }, + "4.21": { /* full ANALYSIS_RESULT */ }, + "4.20": { /* full ANALYSIS_RESULT */ } + } + } + ], + "unable_to_analyze": [ + { + "test_name": "...", + "reason": "No Prow job URLs available" + } + ] +} +``` + +**Save to file**: + +```bash +cat > .work/detect-backport-regressions/confirmed_cascades.json <<< "$output_json" +``` + +## Output Format + +The skill outputs JSON to stdout and saves it to `.work/detect-backport-regressions/confirmed_cascades.json`. + +**Key sections**: +- `confirmed_cascades`: Cascades with `same_root_cause: true` +- `false_positives`: Cascades with `same_root_cause: false` +- `unable_to_analyze`: Cascades missing data for analysis + +## Similarity Threshold + +The default similarity threshold is **0.6** (60% similarity required). + +Adjust the threshold based on: +- **Higher threshold (0.7-0.8)**: Fewer false positives, more false negatives +- **Lower threshold (0.4-0.5)**: More false positives, fewer false negatives + +## Performance Considerations + +**Parallel Execution**: This skill can be time-intensive because it launches multiple subagents. To maximize performance: + +1. **Launch all subagents in a single message** with multiple Task tool calls +2. **Limit cascade analysis**: Use `--min-cascade 2` to focus on more severe cases +3. **Batch processing**: Process cascades in groups if there are many + +**Typical execution times**: +- 1 cascade (5 releases): ~3-5 minutes +- 5 cascades (avg 3 releases each): ~10-15 minutes +- 10+ cascades: Consider running in batches + +## Error Handling + +**Missing Prow URLs**: +- Some regressions may not have sample failed jobs +- Mark as "unable_to_analyze" and continue with others + +**Subagent failures**: +- If a subagent fails to analyze a job, mark that release as low confidence +- Can still compare remaining releases + +**Low confidence results**: +- If confidence_level < 3, flag for manual review +- Don't automatically classify as false positive + +## Example Usage + +This skill is typically invoked after `detect-potential-cascades`: + +```bash +# Step 1: Detect potential cascades +# (Skill: detect-potential-cascades) + +# Step 2: Analyze similarity (this skill) +# Agent reads this skill and executes the steps above + +# Step 3: Generate report +# (Skill: generate-cascade-report) +``` + +## See Also + +- `detect-potential-cascades` - Identify potential cascades by test name matching +- `generate-cascade-report` - Generate HTML/Markdown reports from confirmed cascades +- `ci:fetch-regression-details` - Get Prow job URLs from regression IDs +- `ci:prow-job-analyze-test-failure` - Analyze individual Prow job failures diff --git a/plugins/ci/skills/detect-backport-regressions/SKILL.md b/plugins/ci/skills/detect-backport-regressions/SKILL.md new file mode 100644 index 000000000..c0c319371 --- /dev/null +++ b/plugins/ci/skills/detect-backport-regressions/SKILL.md @@ -0,0 +1,301 @@ +--- +name: Detect Backport Regressions +description: Detect regressions that have cascaded from newer releases to older releases due to problematic backports +--- + +# Detect Backport Regressions + +**NOTE: This skill has been refactored into three focused sub-skills for better modularity and maintainability.** + +## New Skill-Based Implementation + +The backport regression detection functionality has been split into three focused skills: + +1. **`detect-potential-cascades`** - Identifies potential cascades by test name matching + - Fast, test-name-only matching + - Temporal ordering analysis + - Severity calculation + +2. **`analyze-cascade-similarity`** - Performs deep root cause analysis + - Fetches Prow job URLs + - Launches parallel subagents for failure analysis + - Compares root causes to confirm real cascades vs false positives + +3. **`generate-cascade-report`** - Generates final reports + - Interactive HTML reports + - Markdown and JSON output + - Similarity visualization + +## When to Use This Skill + +Use the `/ci:detect-backport-regressions` command, which orchestrates all three sub-skills automatically. + +Alternatively, you can use the individual skills if you need just one step: + +- Use `detect-potential-cascades` alone for fast test-name matching without similarity analysis +- Use `analyze-cascade-similarity` to analyze existing potential_cascades.json +- Use `generate-cascade-report` to regenerate reports from existing data + +## Prerequisites + +1. **Network Access**: Must be able to reach Sippy API endpoints and GCS (for Prow artifacts) +2. **Installed plugins**: + - `ci` plugin (for fetch-releases, fetch-regression-details, prow-job-analyze-test-failure) + - `teams` plugin (for list-regressions) + +## Quick Start + +**Recommended**: Use the command which handles all orchestration: + +```bash +/ci:detect-backport-regressions --days 45 --min-cascade 2 +``` + +**Advanced**: Use individual skills for more control: + +```bash +# Step 1: Detect potential cascades (fast) +# Load skill: detect-potential-cascades +# Parameters: --current-release 4.22 --lookback 4 --days 45 + +# Step 2: Analyze similarity (slow, thorough) +# Load skill: analyze-cascade-similarity +# Reads: .work/detect-backport-regressions/potential_cascades.json + +# Step 3: Generate report +# Load skill: generate-cascade-report --format html +# Reads: .work/detect-backport-regressions/confirmed_cascades.json +``` + +### Step 2: Analyze Test Failure Similarity with Parallel Subagents + +**IMPORTANT**: This skill **always** launches **parallel subagents** to perform deep failure analysis for each release version, then compares the root causes to determine if the cascade is real or a false positive. + +**How Similarity Analysis Works**: + +For each potential cascade (a test that fails in both the dev release and one or more older releases): + +1. **Fetch Prow job URLs** for representative failures in each release: + - Use `fetch-regression-details` to get the test_id + - Use `fetch-test-runs` to get failed job runs for that test in each release + - Extract the most recent Prow job URL for each release + +2. **Launch parallel subagents** to analyze each failure independently: + - Launch one subagent per release (dev release + all cascade releases) + - All subagents run in parallel for maximum speed + - Each subagent receives the prompt: `"Analyze in this Prow job: "` + - This prompt format triggers the `ci:prow-job-analyze-test-failure` skill + +3. **Subagent analysis requirements**: + - **Trace to root cause**: Never stop at symptoms like "0 nodes ready", "operator degraded", or "crash-looping". Download log bundles, pod logs, and container previous logs. Cite specific error messages. + - **Return structured results**: Each subagent MUST include an `ANALYSIS_RESULT` block at the end: + ``` + ANALYSIS_RESULT: + - root_cause_summary: + - affected_components: + - key_error_patterns: + - known_symptoms: + - test_name: + - confidence_level: <1-5, where 5 is highest confidence in the root cause> + ``` + +4. **Compare analysis results** across releases: + - Extract the `ANALYSIS_RESULT` block from each subagent response + - Compare `root_cause_summary`, `affected_components`, and `key_error_patterns` + - Determine similarity: + - **Same root cause**: Components and error patterns match (even if timestamps/UUIDs differ) + - **Different root cause**: Different components or fundamentally different error patterns + - **Low confidence**: If any subagent reports `confidence_level` < 3, flag for manual review + +5. **Filter cascades**: + - **Keep**: Cascades where all releases show the same root cause + - **Discard**: Cascades where releases show different root causes (false positive) + - **Flag**: Cascades with low confidence or incomplete analysis data + +### Step 3: Parse and Present Results + +The script automatically: +1. Auto-detects the current development release (or uses `--current-release`) +2. Calculates lookback releases based on `--lookback` parameter +3. Fetches regression data for all releases using the `list-regressions` skill +4. Matches regressions by test name across releases +5. Outputs the list of potential cascades in JSON format (before similarity analysis) + +**After the script completes**, you (the Claude agent) must: +1. Read the script's JSON output to identify potential cascades +2. For each cascade, follow Step 2 to launch parallel subagents and analyze failures +3. Compare the ANALYSIS_RESULT blocks from each subagent +4. Determine which cascades are confirmed (same root cause) vs false positives (different root causes) +5. Generate the final report with the requested format: + - **HTML report** (`--format html`, default): Self-contained HTML with collapsible sections, GitHub-style dark theme + - **Markdown report** (`--format markdown`): Human-readable report with severity levels, timelines, and actionable recommendations + - **JSON output** (`--format json`): Machine-readable format for automation +6. Include only confirmed cascades in the final report, with similarity analysis details + +### Command-Line Arguments + +All arguments are optional: + +- `--current-release `: Override auto-detection (e.g., `4.22`) +- `--lookback N`: Number of previous releases to scan (default: 4, range: 1-6) +- `--days N`: Time window for cascade detection (default: 30) +- `--exclude-install true|false`: Exclude installation failures (default: true) +- `--component `: Filter by component name +- `--min-cascade N`: Minimum cascade count to report (default: 1) +- `--format html|markdown|json`: Output format for the final report (default: html). **Note**: The script always outputs JSON; the agent generates the final report in the requested format. +- `--include-resolved`: Include closed regressions in stable releases (default: false) + +### Error Handling + +The script handles common errors gracefully: + +**No current release detected**: +``` +ERROR: Could not auto-detect current development release +HINT: Use --current-release to manually specify (e.g., --current-release 4.22) +``` + +**API failures**: Retries failed requests and continues with available data + +**No cascades found**: +``` +✅ No cascading regressions detected - system is healthy! +``` + +### Exit Codes + +- `0`: Success (cascades found or no cascades) +- `1`: General error +- `2`: Missing dependencies or configuration error +- `3`: Critical cascades detected (3+ releases affected) +- `130`: Interrupted by user (Ctrl+C) + +## Example Output Structure + +### Markdown Example (truncated) + +```markdown +# Backport Regression Detection Report +**Generated**: 2026-03-23 14:30:00 UTC +**Current Release**: 4.22 +**Scanned Releases**: 4.21, 4.20, 4.19, 4.18 +**Time Window**: Last 30 days + +## Summary +- **Total Cascading Regressions**: 3 +- **Critical (3+ releases)**: 1 +- **High (2 releases)**: 1 +- **Medium (1 release)**: 1 +- **Low (untriaged)**: 0 +- **Affected Releases**: 4.21 (3), 4.20 (2), 4.19 (1), 4.18 (1) + +--- + +## 🚨 CRITICAL: kube-apiserver - graceful termination failure + +**Test**: `[sig-api-machinery] kube-apiserver should terminate gracefully within grace period` +**Test ID**: `openshift-tests:2bc0fe9de9a98831c20e569a21d7ded9` +**Component**: kube-apiserver + +### Origin (4.22-main) +- **First Detected**: 2026-01-15 10:30:00Z (68 days ago) +- **JIRA Bug**: [OCPBUGS-75200](https://issues.redhat.com/browse/OCPBUGS-75200) +- **Triage Date**: 2026-01-16 08:00:00Z +- **Status**: OPEN - Assigned to API Server team + +### Cascade Timeline +| Release | First Detected | Days After Origin | Status | Resolved | JIRA Links | +|---------|----------------|-------------------|--------|----------|------------| +| 4.21 | 2026-02-01 14:20:00Z | 17 days | OPEN | - | [OCPBUGS-75200](https://issues.redhat.com/browse/OCPBUGS-75200) | +| 4.20 | 2026-02-10 09:15:00Z | 26 days | ✅ RESOLVED | 2026-02-15 09:30:00Z | [OCPBUGS-75200](https://issues.redhat.com/browse/OCPBUGS-75200) | +| 4.19 | 2026-02-18 11:45:00Z | 34 days | OPEN | - | [OCPBUGS-75200](https://issues.redhat.com/browse/OCPBUGS-75200) | +| 4.18 | 2026-02-25 06:30:00Z | 41 days | OPEN | - | None | + +### 🔴 Recommended Actions +1. **URGENT**: Stop all backports related to OCPBUGS-75200 +2. Review recent MCO/systemd changes backported to 4.21, 4.20, 4.19, 4.18 +3. Consider reverting problematic backports in stable branches +4. Link this cascade pattern to OCPBUGS-75200 for team visibility +5. Monitor for further spread to 4.17 + +--- + +## Quick Actions + +### Immediate Stops +The following bugs have active cascade patterns and should have backports halted: +- OCPBUGS-75200 (4 releases affected) +- OCPBUGS-74833 (2 releases affected) + +### Backport Review Needed +Review recent backports (last 30 days) for these components: +- kube-apiserver (1 cascading regression) +- MCO (1 cascading regression) + +### Monitor List +Regressions showing early cascade pattern (1 release): +- OCPBUGS-75401: etcd leader election (4.22 → 4.21) +``` + +### JSON Example (truncated) + +```json +{ + "generated": "2026-03-23T14:30:00Z", + "current_release": "4.22", + "scanned_releases": ["4.21", "4.20", "4.19", "4.18"], + "time_window_days": 30, + "summary": { + "total_cascades": 3, + "severity_counts": { + "CRITICAL": 1, + "HIGH": 1, + "MEDIUM": 1, + "LOW": 0 + }, + "affected_releases": { + "4.21": 3, + "4.20": 2, + "4.19": 1, + "4.18": 1 + } + }, + "cascades": [ + { + "test_id": "openshift-tests:2bc0fe9de9a98831c20e569a21d7ded9", + "test_name": "[sig-api-machinery] kube-apiserver should terminate gracefully within grace period", + "component": "kube-apiserver", + "severity": "CRITICAL", + "origin": { + "release": "4.22", + "opened": "2026-01-15T10:30:00Z", + "triages": [ + { + "jira_key": "OCPBUGS-75200", + "url": "https://issues.redhat.com/browse/OCPBUGS-75200", + "created_at": "2026-01-16T08:00:00Z" + } + ] + }, + "cascade_releases": [ + { + "release": "4.21", + "opened": "2026-02-01T14:20:00Z", + "closed": null, + "days_after_origin": 17, + "status": "open", + "is_resolved": false + }, + { + "release": "4.20", + "opened": "2026-02-10T09:15:00Z", + "closed": "2026-02-15T09:30:00Z", + "days_after_origin": 26, + "status": "closed", + "is_resolved": true + } + ] + } + ] +} +``` diff --git a/plugins/ci/skills/detect-potential-cascades/SKILL.md b/plugins/ci/skills/detect-potential-cascades/SKILL.md new file mode 100644 index 000000000..257adeab8 --- /dev/null +++ b/plugins/ci/skills/detect-potential-cascades/SKILL.md @@ -0,0 +1,356 @@ +--- +name: Detect Potential Cascades +description: Identify regressions that appear across multiple releases by matching test names and checking temporal ordering +--- + +# Detect Potential Cascades + +This skill identifies regressions that have potentially cascaded backward from the current development release to older stable releases due to problematic backports. It performs test name matching across releases and checks temporal ordering to find potential cascades. + +## When to Use This Skill + +Use this skill when you need to: + +- Identify regressions that started in a development branch and later appeared in older releases +- Find tests that are failing across multiple releases (test name matching only) +- Generate a list of potential cascade candidates for further similarity analysis +- Get initial cascade detection results quickly without deep root cause analysis + +## Prerequisites + +1. **Python 3.6+**: Required to run the helper skills +2. **Network Access**: Must be able to reach Sippy API endpoints +3. **Installed skills**: + - `ci:fetch-releases` - Auto-detect current development release + - `teams:list-regressions` - Fetch regression data for each release + +## Implementation Steps + +### Step 1: Determine Release Scope + +**Auto-detect current development release** or use provided `--current-release`: + +```bash +# Auto-detect latest release +release=$(python3 plugins/ci/skills/fetch-releases/fetch_releases.py --latest) +echo "Current development release: $release" +``` + +**Calculate lookback releases** from the current release: + +```python +# Example: If current is 4.22 and lookback is 4 +# Previous releases: 4.21, 4.20, 4.19, 4.18 +major, minor = 4, 22 +lookback_releases = [f"4.{minor - i}" for i in range(1, 5)] +# Result: ["4.21", "4.20", "4.19", "4.18"] +``` + +### Step 2: Fetch Regression Data for All Releases + +Use `teams:list-regressions` to fetch regression data for the development release and all lookback releases. **Run these in parallel** for better performance. + +**Calculate start date** if `--days` parameter is provided: + +```bash +# For --days 45 +start_date=$(date -d '45 days ago' +%Y-%m-%d) +``` + +**Fetch development release regressions** (include both open and closed): + +```bash +python3 plugins/teams/skills/list-regressions/list_regressions.py \ + --release "$current_release" \ + --start "$start_date" \ + > .work/detect-backport-regressions/${current_release}.json +``` + +**Fetch older release regressions** (in parallel): + +```bash +for release in 4.21 4.20 4.19 4.18; do + python3 plugins/teams/skills/list-regressions/list_regressions.py \ + --release "$release" \ + --start "$start_date" \ + > .work/detect-backport-regressions/${release}.json & +done +wait +``` + +If `--component` is specified, add `--components "$component_name"` to the commands. + +### Step 3: Build Development Release Regression Map + +Parse the development release regression data and build a map keyed by test name: + +```python +dev_regressions = {} # test_name -> regression info + +# Process both open and closed regressions in dev release +for component_name, component_data in dev_data["components"].items(): + for status in ["open", "closed"]: + for regression in component_data[status]: + test_name = regression["test_name"] + + # Filter: Exclude install failures if --exclude-install is true + if exclude_install and ( + test_name.startswith("install should succeed") or + regression["component"] == "cluster install" + ): + continue + + # Filter: Exclude Monitor/invariant tests (default: true) + # Monitor tests are test framework invariant checks, not functional tests + # They are prone to false positives as they fail for many different reasons + if exclude_monitor and "Monitor:" in test_name: + continue + + # Store earliest occurrence of this test in dev release + if test_name not in dev_regressions: + dev_regressions[test_name] = { + "release": current_release, + "test_name": test_name, + "component": regression["component"], + "capability": regression["capability"], + "opened": regression["opened"], # ISO timestamp + "triages": regression["triages"], + "variants": regression["variants"], + "regression_id": regression["id"] + } + else: + # If test appears multiple times, keep earliest opened timestamp + # and merge triages from all occurrences + if parse_timestamp(regression["opened"]) < parse_timestamp(dev_regressions[test_name]["opened"]): + dev_regressions[test_name]["opened"] = regression["opened"] + dev_regressions[test_name]["regression_id"] = regression["id"] + + # Merge triages + for triage in regression["triages"]: + if triage not in dev_regressions[test_name]["triages"]: + dev_regressions[test_name]["triages"].append(triage) +``` + +### Step 4: Scan Older Releases for Matching Tests + +For each older release, look for regressions with matching test names: + +```python +cascades = {} # test_name -> cascade info + +for older_release in lookback_releases: + # Parse regression data for this release + older_data = load_json(f".work/detect-backport-regressions/{older_release}.json") + + # Check open regressions (and closed if --include-resolved is set) + statuses_to_check = ["open", "closed"] if include_resolved else ["open"] + + for component_name, component_data in older_data["components"].items(): + for status in statuses_to_check: + for regression in component_data[status]: + test_name = regression["test_name"] + + # Skip if this test doesn't exist in dev release + if test_name not in dev_regressions: + continue + + # Filter: Exclude Monitor/invariant tests (default: true) + if exclude_monitor and "Monitor:" in test_name: + continue + + # Check temporal ordering: older release AFTER dev release? + dev_opened = parse_timestamp(dev_regressions[test_name]["opened"]) + older_opened = parse_timestamp(regression["opened"]) + + if older_opened <= dev_opened: + # Regression in older release appeared first or same time + # This is NOT a cascade - skip it + continue + + # Calculate time difference + days_after_origin = (older_opened - dev_opened).days + + # Check if within time window + if days_after_origin > days_window: + continue + + # This is a potential cascade! + if test_name not in cascades: + cascades[test_name] = { + "origin": dev_regressions[test_name], + "cascade_releases": [] + } + + # Add or update cascade release entry + existing = find_cascade_release(cascades[test_name], older_release) + if existing: + # Keep earliest timestamp for this release + if older_opened < parse_timestamp(existing["opened"]): + existing["opened"] = regression["opened"] + existing["days_after_origin"] = days_after_origin + # Merge triages + merge_triages(existing, regression["triages"]) + else: + cascades[test_name]["cascade_releases"].append({ + "release": older_release, + "opened": regression["opened"], + "closed": regression.get("closed"), + "days_after_origin": days_after_origin, + "triages": regression["triages"], + "status": status, + "is_resolved": (status == "closed"), + "regression_id": regression["id"] + }) +``` + +### Step 5: Calculate Severity and Filter + +**Severity levels** based on cascade extent and triage status: + +```python +def calculate_severity(num_cascade_releases, has_triage): + if has_triage and num_cascade_releases >= 3: + return "CRITICAL" # Triaged + 3+ releases + elif has_triage and num_cascade_releases >= 2: + return "HIGH" # Triaged + 2 releases + elif has_triage and num_cascade_releases >= 1: + return "MEDIUM" # Triaged + 1 release + else: + return "LOW" # No triage +``` + +**Filter by minimum cascade count** (if `--min-cascade` is specified): + +```python +filtered_cascades = { + test_name: info + for test_name, info in cascades.items() + if len(info["cascade_releases"]) >= min_cascade +} +``` + +### Step 6: Generate Output JSON + +Create structured JSON output with all potential cascades: + +```json +{ + "generated": "2026-04-02T21:09:32Z", + "current_release": "4.22", + "scanned_releases": ["4.21", "4.20", "4.19", "4.18"], + "time_window_days": 45, + "analysis_type": "test_name_matching", + "similarity_analyzed": false, + "potential_cascades": [ + { + "test_name": "[sig-arch][Feature:ClusterUpgrade] Cluster should remain functional during upgrade", + "severity": "CRITICAL", + "origin": { + "release": "4.22", + "test_name": "...", + "component": "Cluster Version Operator", + "capability": "ClusterUpgrade", + "opened": "2026-02-25T20:03:31.204512Z", + "triages": [...], + "variants": [...], + "regression_id": 35926 + }, + "cascade_releases": [ + { + "release": "4.21", + "opened": "2026-03-25T08:05:05.990192Z", + "closed": null, + "days_after_origin": 27, + "triages": [], + "status": "open", + "is_resolved": false, + "regression_id": 36991 + }, + { + "release": "4.20", + "opened": "2026-03-31T00:05:27.771673Z", + "closed": null, + "days_after_origin": 33, + "triages": [], + "status": "open", + "is_resolved": false, + "regression_id": 37385 + } + ], + "regression_url": "https://sippy-auth.dptools.openshift.org/sippy-ng/component_readiness/regressions/35926", + "needs_similarity_analysis": true + } + ] +} +``` + +**Save to file**: + +```bash +cat > .work/detect-backport-regressions/potential_cascades.json <<< "$output_json" +``` + +## Output Format + +The skill outputs JSON to stdout and saves it to `.work/detect-backport-regressions/potential_cascades.json`. + +**Key fields**: +- `analysis_type`: Always "test_name_matching" for this skill +- `similarity_analyzed`: Always false (similarity analysis is done by separate skill) +- `potential_cascades`: Array of cascade objects +- `needs_similarity_analysis`: Always true for each cascade + +## Parameters + +These parameters should be passed from the command that invokes this skill: + +- `--current-release `: Override auto-detection (e.g., "4.22") +- `--lookback N`: Number of previous releases to scan (default: 4) +- `--days N`: Time window in days for cascade detection (default: 30) +- `--exclude-install true|false`: Exclude installation failures (default: true) +- `--exclude-monitor true|false`: Exclude Monitor/invariant tests (default: true) + - Monitor tests are test framework invariant checks like `[Monitor:pod-network-availability]` + - These tests are prone to false positives as they fail for many different reasons + - Excluding them significantly reduces noise and focuses on functional test failures +- `--component `: Filter by component name +- `--min-cascade N`: Minimum cascade count to report (default: 1) +- `--include-resolved`: Include closed regressions in stable releases (default: false) + +## Exit Codes + +- `0`: Success (cascades found or no cascades) +- `1`: General error +- `2`: Missing dependencies or configuration error +- `3`: Critical cascades detected (3+ releases affected) +- `130`: Interrupted by user (Ctrl+C) + +## Example Usage + +This skill is typically invoked by the `detect-backport-regressions` command, but can also be used standalone: + +```bash +# Basic usage +# Agent reads this skill and executes the steps above + +# With parameters +# --current-release 4.22 --lookback 4 --days 45 --min-cascade 2 +``` + +## Notes + +- This skill performs **test name matching only** - it does NOT analyze root causes +- All potential cascades have `needs_similarity_analysis: true` +- Use `analyze-cascade-similarity` skill to confirm which cascades are real +- The skill creates working directory `.work/detect-backport-regressions/` if it doesn't exist +- Regression data files are cached in `.work/detect-backport-regressions/{release}.json` +- **Monitor test exclusion** (enabled by default) significantly reduces false positives: + - Reduces cascade count from ~25 to ~6 typical cases + - Focuses on functional tests rather than invariant checks + - Monitor tests fail for many platform-specific and transient reasons + +## See Also + +- `analyze-cascade-similarity` - Perform root cause analysis on potential cascades +- `generate-cascade-report` - Generate HTML/Markdown reports from confirmed cascades +- `ci:fetch-releases` - Get available OpenShift releases +- `teams:list-regressions` - Fetch regression data for a release diff --git a/plugins/ci/skills/generate-cascade-report/SKILL.md b/plugins/ci/skills/generate-cascade-report/SKILL.md new file mode 100644 index 000000000..b461165b8 --- /dev/null +++ b/plugins/ci/skills/generate-cascade-report/SKILL.md @@ -0,0 +1,404 @@ +--- +name: Generate Cascade Report +description: Generate interactive HTML, Markdown, or JSON reports from confirmed cascade data with similarity analysis results +--- + +# Generate Cascade Report + +This skill generates final reports from confirmed cascade data, including similarity analysis results. It supports multiple output formats: interactive HTML, Markdown, and JSON. + +## When to Use This Skill + +Use this skill when you need to: + +- Generate a final report of confirmed backport cascades +- Create an interactive HTML report for web browser viewing +- Export cascade data in Markdown or JSON format +- Present cascade analysis results to stakeholders + +## Prerequisites + +1. **Input file**: `.work/detect-backport-regressions/confirmed_cascades.json` from `analyze-cascade-similarity` skill +2. **Output format** specified via `--format` parameter (html, markdown, or json) + +## Implementation Steps + +### Step 1: Load Confirmed Cascades + +Read the JSON output from the `analyze-cascade-similarity` skill: + +```bash +confirmed_cascades=$(cat .work/detect-backport-regressions/confirmed_cascades.json) +``` + +Parse the data: + +```python +import json +data = json.loads(confirmed_cascades) +confirmed = data["confirmed_cascades"] +false_positives = data["false_positives"] +unable_to_analyze = data["unable_to_analyze"] +``` + +### Step 2: Determine Output Format + +Check the `--format` parameter (default: html): + +```python +format = args.format # "html", "markdown", or "json" + +if format == "html": + generate_html_report(data) +elif format == "markdown": + generate_markdown_report(data) +elif format == "json": + # Pass through the JSON data + print(json.dumps(data, indent=2)) +``` + +### Step 3: Generate HTML Report (Default) + +**HTML Report Features**: +- Self-contained (all CSS/JS embedded) +- Interactive collapsible sections +- Color-coded severity levels +- GitHub-style dark theme +- Similarity analysis visualization +- Direct links to Sippy and JIRA + +**HTML Structure**: + +```html + + + + + Backport Regression Detection Report + + + +
+ +
+

Backport Regression Detection Report

+
+ Generated: {timestamp} + Current Release: {current_release} + Scanned Releases: {releases} +
+
+ + +
+
+
Critical
+
{count}
+
+ +
+ + +

Confirmed Cascades

+ {for each confirmed cascade} +
+
+ {severity} +
{test_name}
+
+ Similarity: {similarity_score * 100}% +
+
+
+ +
+

Origin ({origin.release})

+
+ Root Cause: {root_cause_summary} + Affected Components: {affected_components} + Confidence: {confidence_level}/5 +
+
+ + +
+

Cascade Timeline

+ + + + + + + + {for each cascade_release} + + + + + + +
ReleaseRoot CauseSimilarityStatus
{release}{root_cause_summary} + + {similarity_to_origin * 100}% + {if similarity >= 0.6: ✓} + + {status}
+
+ + +
+

Similarity Analysis

+
+ +
+
+
+
+ + +

False Positives (Different Root Causes)

+ {for each false_positive} +
+ +
+ Why this is a false positive: + Origin: {origin.root_cause_summary} + 4.21: {release_4.21.root_cause_summary} + 4.20: {release_4.20.root_cause_summary} + → Different components and error patterns +
+
+
+ + + + +``` + +**Save HTML to file**: + +```bash +timestamp=$(date +%Y%m%d_%H%M%S) +output_file="backport-regression-report_${timestamp}.html" +echo "$html_content" > "$output_file" +echo "Report saved to: $output_file" +``` + +### Step 4: Generate Markdown Report + +**Markdown Report Structure**: + +```markdown +# Backport Regression Detection Report + +**Generated**: 2026-04-02 22:30:00 UTC +**Current Release**: 4.22 +**Scanned Releases**: 4.21, 4.20, 4.19, 4.18 +**Analysis Type**: Root Cause Comparison with Similarity Analysis + +## Executive Summary + +- **Total Confirmed Cascades**: 3 +- **Critical** (3+ releases, triaged): 1 +- **High** (2 releases, triaged): 1 +- **Medium** (1 release, triaged): 1 +- **False Positives Detected**: 2 + +--- + +## Confirmed Cascades + +### 🚨 CRITICAL: etcd data loss during upgrade + +**Test**: `[sig-etcd] etcd should not lose quorum during upgrade` +**Severity**: CRITICAL +**Similarity Score**: 85% +**Component**: etcd, storage + +#### Origin (4.22) +- **First Detected**: 2026-02-15 10:30:00Z +- **Root Cause**: etcd data corruption due to fsync failures on XFS filesystem +- **Affected Components**: etcd, storage, machine-config +- **Key Error Patterns**: `failed to fsync`, `etcd data corruption`, `leveldb: corrupted` +- **JIRA**: [OCPBUGS-75000](https://issues.redhat.com/browse/OCPBUGS-75000) +- **Confidence**: 5/5 + +#### Cascade Timeline + +| Release | First Detected | Days After | Root Cause | Similarity | Status | +|---------|----------------|------------|------------|------------|--------| +| **4.21** | 2026-03-10 | 23 days | etcd data corruption due to fsync failures | ✓ 85% | OPEN | +| **4.20** | 2026-03-15 | 28 days | etcd data corruption due to fsync failures | ✓ 82% | OPEN | +| **4.19** | 2026-03-18 | 31 days | etcd data corruption due to fsync failures | ✓ 88% | OPEN | + +**Analysis**: All releases show the **same root cause** - etcd fsync failures on XFS filesystems. This is a confirmed backport cascade likely caused by a machine-config or storage layer change that was backported from 4.22 to older releases. + +**Recommended Action**: +1. Halt all backports to etcd, machine-config, and storage components +2. Review recent backports to identify the problematic change +3. File blocker bugs for all affected releases +4. Link to origin JIRA: OCPBUGS-75000 + +--- + +### ⚠️ FALSE POSITIVE: Cluster upgrade test failures + +**Test**: `[sig-arch][Feature:ClusterUpgrade] Cluster should remain functional during upgrade` +**Severity**: CRITICAL (by test name matching) +**Similarity Score**: 15% +**Why this is a false positive**: Different root causes across releases + +#### Origin (4.22) +- **Root Cause**: Local registry ImagePullBackOff on bare metal (dev-scripts specific) +- **Components**: local-image-registry, worker-node-image-pull +- **Confidence**: 4/5 + +#### 4.21 - Different Root Cause ✗ +- **Root Cause**: CNI configuration loss during upgrade causing NetworkPluginNotReady +- **Components**: network (OVN-Kubernetes), machine-config, etcd +- **Similarity to Origin**: 10% +- **Confidence**: 5/5 + +#### 4.20 - Different Root Cause ✗ +- **Root Cause**: External quay.io registry HTTP 502 errors (infrastructure outage) +- **Components**: External/quay.io, Test Framework +- **Similarity to Origin**: 20% +- **Confidence**: 5/5 + +**Analysis**: While the **same test** failed across all three releases, the **root causes are completely different**: +- 4.22: Bare metal local registry issue +- 4.21: OVN-Kubernetes CNI configuration bug (REAL PRODUCT BUG) +- 4.20: External registry outage (NOT ACTIONABLE) + +This is NOT a backport cascade. These are independent failures. + +**Recommended Action**: +1. Remove from cascade report - false positive +2. Escalate 4.21 CNI issue separately as critical networking bug +3. 4.22 bare metal registry issue needs investigation +4. 4.20 was transient external issue - no action needed + +--- + +## Summary + +### Confirmed True Cascades: 3 +1. etcd data loss (CRITICAL) - 3 releases affected +2. kube-apiserver graceful termination (HIGH) - 2 releases +3. DNS operator pod scheduling (MEDIUM) - 1 release + +### False Positives: 2 +1. Cluster upgrade test (different root causes) +2. Network pod creation (different infrastructure issues) + +### Unable to Analyze: 1 +1. Storage provisioning test (no Prow job URLs available) +``` + +**Output markdown**: + +```bash +# Output to stdout (can be redirected to file) +echo "$markdown_content" +``` + +### Step 5: JSON Output + +For JSON format, simply pass through the confirmed_cascades.json data with proper formatting: + +```bash +cat .work/detect-backport-regressions/confirmed_cascades.json | jq '.' +``` + +Or add additional metadata: + +```json +{ + "report_generated": "2026-04-02T22:30:00Z", + "report_format": "json", + "summary": { + "total_confirmed": 3, + "total_false_positives": 2, + "by_severity": { + "CRITICAL": 1, + "HIGH": 1, + "MEDIUM": 1 + } + }, + "confirmed_cascades": [ /* from input */ ], + "false_positives": [ /* from input */ ], + "unable_to_analyze": [ /* from input */ ] +} +``` + +## Output Formats + +### HTML (Default) +- **File**: `backport-regression-report_YYYYMMDD_HHMMSS.html` +- **Location**: Current working directory +- **Features**: Interactive, self-contained, no external dependencies + +### Markdown +- **Output**: stdout +- **Usage**: Can be redirected to file or piped to other tools +- **Features**: Human-readable, GitHub-compatible + +### JSON +- **Output**: stdout +- **Usage**: For automation, CI/CD integration, data processing +- **Features**: Machine-readable, structured data + +## Severity Color Coding + +- **CRITICAL**: Red (#f85149) - 3+ releases, triaged +- **HIGH**: Orange (#d29922) - 2 releases, triaged +- **MEDIUM**: Yellow (#d4a72c) - 1 release, triaged +- **LOW**: Blue (#58a6ff) - Untriaged + +## Similarity Visualization + +**Checkmarks in reports**: +- ✓ (green checkmark): Similarity >= 60% (confirmed same root cause) +- ✗ (red X): Similarity < 60% (different root cause) + +**Similarity badges**: +- 80-100%: "Excellent match" +- 60-79%: "Good match" +- 40-59%: "Moderate match" +- 0-39%: "Poor match" + +## Example Usage + +This skill is typically invoked after `analyze-cascade-similarity`: + +```bash +# Generate HTML report (default) +# Skill: generate-cascade-report with --format html + +# Generate Markdown report to stdout +# Skill: generate-cascade-report with --format markdown + +# Generate JSON for automation +# Skill: generate-cascade-report with --format json > output.json +``` + +## See Also + +- `detect-potential-cascades` - Identify potential cascades by test name matching +- `analyze-cascade-similarity` - Perform root cause analysis and similarity comparison diff --git a/plugins/teams/.claude-plugin/plugin.json b/plugins/teams/.claude-plugin/plugin.json index 3e3392fb4..42f383712 100644 --- a/plugins/teams/.claude-plugin/plugin.json +++ b/plugins/teams/.claude-plugin/plugin.json @@ -1,7 +1,7 @@ { "name": "teams", "description": "Team structure knowledge and health analysis commands for OpenShift teams", - "version": "0.0.13", + "version": "0.0.14", "author": { "name": "github.com/openshift-eng" } diff --git a/plugins/teams/skills/list-regressions/list_regressions.py b/plugins/teams/skills/list-regressions/list_regressions.py index 716ff1342..90c5f5dd3 100755 --- a/plugins/teams/skills/list-regressions/list_regressions.py +++ b/plugins/teams/skills/list-regressions/list_regressions.py @@ -291,7 +291,7 @@ def remove_unnecessary_fields(regressions: list) -> list: """ Remove unnecessary fields from regressions to reduce response size. - Removes 'links' and 'test_id' fields from each regression object. + Removes 'links' field from each regression object. Args: regressions: List of regression dictionaries @@ -300,9 +300,8 @@ def remove_unnecessary_fields(regressions: list) -> list: List of regression dictionaries with unnecessary fields removed """ for regression in regressions: - # Remove links and test_id to reduce response size + # Remove links to reduce response size regression.pop('links', None) - regression.pop('test_id', None) return regressions