diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index faeb50b1b..b23fad942 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -37,18 +37,6 @@ "description": "Team structure knowledge and health analysis commands for OpenShift teams", "version": "0.0.14" }, - { - "name": "doc", - "source": "./plugins/doc", - "description": "A plugin for engineering documentation and notes", - "version": "0.0.2" - }, - { - "name": "session", - "source": "./plugins/session", - "description": "A plugin for Claude session management and persistence", - "version": "0.0.2" - }, { "name": "snowflake", "source": "./plugins/snowflake", @@ -65,7 +53,7 @@ "name": "utils", "source": "./plugins/utils", "description": "A generic utilities plugin serving as a catch-all for various helper commands", - "version": "0.0.9" + "version": "0.0.10" }, { "name": "olm", @@ -109,30 +97,12 @@ "description": "Etcd cluster health monitoring and performance analysis utilities", "version": "0.0.2" }, - { - "name": "yaml", - "source": "./plugins/yaml", - "description": "YAML documentation and utilities", - "version": "0.0.2" - }, { "name": "must-gather", "source": "./plugins/must-gather", "description": "A plugin to analyze and report on must-gather data", "version": "0.0.2" }, - { - "name": "lvms", - "source": "./plugins/lvms", - "description": "LVMS (Logical Volume Manager Storage) plugin for troubleshooting and debugging storage issues", - "version": "0.1.1" - }, - { - "name": "native-notifications", - "source": "./plugins/native-notifications", - "description": "Cross-platform desktop notifications for ai-helpers", - "version": "0.0.2" - }, { "name": "hcp", "source": "./plugins/hcp", @@ -145,36 +115,12 @@ "description": "Security compliance and vulnerability analysis tools for Go projects", "version": "0.0.3" }, - { - "name": "test-coverage", - "source": "./plugins/test-coverage", - "description": "Analyze test coverage and identify gaps in test scenarios", - "version": "0.0.2" - }, { "name": "node-tuning", "source": "./plugins/node-tuning", "description": "Generate and analyze OpenShift node tuning profiles", "version": "1.0.0" }, - { - "name": "testing", - "source": "./plugins/testing", - "description": "Comprehensive testing utilities for operators and applications", - "version": "0.1.0" - }, - { - "name": "origin", - "source": "./plugins/origin", - "description": "Helpers for openshift/origin development.", - "version": "0.0.2" - }, - { - "name": "container-image", - "source": "./plugins/container-image", - "description": "Container image inspection and analysis using skopeo and podman", - "version": "0.0.2" - }, { "name": "node", "source": "./plugins/node", @@ -193,12 +139,6 @@ "description": "Manage isolated git worktree workspaces for multi-repo development", "version": "1.0.1" }, - { - "name": "gwapi", - "source": "./plugins/gwapi", - "description": "Gateway API installation utilities for Kubernetes/OpenShift clusters", - "version": "0.0.1" - }, { "name": "code-review", "source": "./plugins/code-review", diff --git a/.pruneprotect b/.pruneprotect index a20e5ecff..cac6a4c4c 100644 --- a/.pruneprotect +++ b/.pruneprotect @@ -4,9 +4,5 @@ # Canonical example plugin plugins/hello-world/ -# Infrastructure plugins (hook-only, by design) -plugins/metrics/ -plugins/native-notifications/ - # Marketplace operations plugin plugins/marketplace-ops/ diff --git a/GEMINI.md b/GEMINI.md deleted file mode 100644 index 2799490d6..000000000 --- a/GEMINI.md +++ /dev/null @@ -1,9 +0,0 @@ -# AI Helpers - Gemini CLI Extension - -This extension provides commands and skills for various development tasks. -Commands are organized by topic using directory-based namespacing -(e.g. `/ci:analyze-payload`, `/git:commit-suggest`, `/jira:create`). - -## Available command groups - -- **hello-world**: A hello world plugin diff --git a/PLUGINS.md b/PLUGINS.md index 96c6d4067..325203a6d 100644 --- a/PLUGINS.md +++ b/PLUGINS.md @@ -8,15 +8,11 @@ This document lists all available Claude Code plugins and their commands in the - [Ci](#ci-plugin) - [Code Review](#code-review-plugin) - [Compliance](#compliance-plugin) -- [Container Image](#container-image-plugin) -- [Doc](#doc-plugin) - [Etcd](#etcd-plugin) - [Git](#git-plugin) -- [Gwapi](#gwapi-plugin) - [Hcp](#hcp-plugin) - [Hello World](#hello-world-plugin) - [Jira](#jira-plugin) -- [Lvms](#lvms-plugin) - [Marketplace Ops](#marketplace-ops-plugin) - [Must Gather](#must-gather-plugin) - [Node](#node-plugin) @@ -25,18 +21,13 @@ This document lists all available Claude Code plugins and their commands in the - [Olm Team](#olm-team-plugin) - [Openshift](#openshift-plugin) - [Openshift Tls Profile](#openshift-tls-profile-plugin) -- [Origin](#origin-plugin) - [Ote Migration](#ote-migration-plugin) - [Rds Analyzer](#rds-analyzer-plugin) -- [Session](#session-plugin) - [Snowflake](#snowflake-plugin) - [Sosreport](#sosreport-plugin) - [Teams](#teams-plugin) -- [Test Coverage](#test-coverage-plugin) -- [Testing](#testing-plugin) - [Utils](#utils-plugin) - [Workspaces](#workspaces-plugin) -- [Yaml](#yaml-plugin) ### Agendas Plugin @@ -115,26 +106,6 @@ Security compliance and vulnerability analysis tools for Go projects See [plugins/compliance/README.md](plugins/compliance/README.md) for detailed documentation. -### Container Image Plugin - -Container image inspection and analysis using skopeo and podman - -**Commands:** -- **`/container-image:compare` ` `** - Compare two container images to identify differences -- **`/container-image:inspect` ``** - Inspect and provide detailed breakdown of a container image -- **`/container-image:tags` ``** - List and analyze available tags for a container image repository - -See [plugins/container-image/README.md](plugins/container-image/README.md) for detailed documentation. - -### Doc Plugin - -A plugin for engineering documentation and notes - -**Commands:** -- **`/doc:note` `[task description]`** - Generate professional engineering notes and append them to a log file - -See [plugins/doc/README.md](plugins/doc/README.md) for detailed documentation. - ### Etcd Plugin Etcd cluster health monitoring and performance analysis utilities @@ -163,17 +134,6 @@ Git workflow automation and utilities See [plugins/git/README.md](plugins/git/README.md) for detailed documentation. -### Gwapi Plugin - -Gateway API management for Kubernetes/OpenShift clusters - -**Commands:** -- **`/gwapi:check` `[namespace]`** - Check Gateway API resources status in the cluster -- **`/gwapi:delete` `[namespace]`** - Delete Gateway API resources from a Kubernetes/OpenShift cluster -- **`/gwapi:install` `[namespace]`** - Install Gateway API resources to a Kubernetes/OpenShift cluster - -See [plugins/gwapi/README.md](plugins/gwapi/README.md) for detailed documentation. - ### Hcp Plugin Generate HyperShift cluster creation commands via hcp CLI from natural language descriptions @@ -219,15 +179,6 @@ A plugin to automate tasks with Jira See [plugins/jira/README.md](plugins/jira/README.md) for detailed documentation. -### Lvms Plugin - -LVMS (Logical Volume Manager Storage) plugin for troubleshooting and debugging storage issues - -**Commands:** -- **`/lvms:analyze` `[must-gather-path|--live] [--component storage|operator|volumes]`** - Comprehensive LVMS troubleshooting - analyzes LVMCluster, volume groups, PVCs, and storage issues on live clusters or must-gather - -See [plugins/lvms/README.md](plugins/lvms/README.md) for detailed documentation. - ### Marketplace Ops Plugin Maintenance commands for Claude Code plugin marketplaces @@ -332,15 +283,6 @@ Implementation requirements and details for OpenShift TLS security profiles See [plugins/openshift-tls-profile/README.md](plugins/openshift-tls-profile/README.md) for detailed documentation. -### Origin Plugin - -Helpers for openshift/origin development. - -**Commands:** -- **`/origin:two-node-origin-pr-helper` `[--url PR_URL] [] [--depth quick|full]`** - Expert review tool for PRs that add or modify Two Node (Fencing or Arbiter) tests under test/extended/two_node/ in openshift/origin. - -See [plugins/origin/README.md](plugins/origin/README.md) for detailed documentation. - ### Ote Migration Plugin Automate OpenShift Tests Extension (OTE) migration for component repositories @@ -359,15 +301,6 @@ Reference Design Specification (RDS) Analyzer workflow: cluster-compare JSON to See [plugins/rds-analyzer/README.md](plugins/rds-analyzer/README.md) for detailed documentation. -### Session Plugin - -A plugin to save and resume conversation sessions across long time intervals - -**Commands:** -- **`/session:save-session` `[optional-description]`** - Save current conversation session to markdown file for future continuation - -See [plugins/session/README.md](plugins/session/README.md) for detailed documentation. - ### Snowflake Plugin Snowflake data analysis commands for engineering metrics and reports @@ -405,25 +338,6 @@ Team structure knowledge and health analysis commands for OpenShift teams See [plugins/teams/README.md](plugins/teams/README.md) for detailed documentation. -### Test Coverage Plugin - -Analyze code coverage and identify untested paths - -**Commands:** -- **`/test-coverage:analyze` ` [--output ] [--priority ] [--test-structure-only]`** - Analyze test code structure without running tests to identify coverage gaps -- **`/test-coverage:gaps` ` [--output ]`** - Identify E2E test scenario gaps in OpenShift/Kubernetes tests (component-agnostic) - -See [plugins/test-coverage/README.md](plugins/test-coverage/README.md) for detailed documentation. - -### Testing Plugin - -Comprehensive testing utilities for operators and applications - -**Commands:** -- **`/testing:mutation-test` `[operator-path] [--controllers ] [--mutation-types ] [--report-format ]`** - Test operator controller quality through mutation testing - validates test suite catches code mutations - -See [plugins/testing/README.md](plugins/testing/README.md) for detailed documentation. - ### Utils Plugin A generic utilities plugin serving as a catch-all for various helper commands and agents @@ -434,7 +348,6 @@ A generic utilities plugin serving as a catch-all for various helper commands an - **`/utils:find-konflux-images` ``** - Find and verify Konflux-built container images from a GitHub PR - **`/utils:generate-test-plan` `[GitHub PR URLs]`** - Generate test steps for one or more related PRs - **`/utils:gh-attention` `[--repo ]`** - List PRs and issues requiring your attention -- **`/utils:placeholder`** - Placeholder command for the utils plugin - **`/utils:process-renovate-pr` ` [JIRA_PROJECT] [COMPONENT]`** - Process Renovate dependency PR(s) to meet repository contribution standards - **`/utils:review-ai-helpers-overlap` `[--idea TEXT] [--pr NUMBER] [--verbose]`** - Review potential overlaps with existing ai-helpers (Claude Code Plugins, Commands, Skills, Sub-agents, or Hooks) and open PRs - **`/utils:review-security` `[file-paths-or-patterns]`** - Orchestrate security scanners and provide contextual triage of findings @@ -450,12 +363,3 @@ Manage isolated git worktree workspaces for multi-repo development - **`/workspaces:delete` ``** - Delete a workspace and its git worktrees See [plugins/workspaces/README.md](plugins/workspaces/README.md) for detailed documentation. - -### Yaml Plugin - -Generate comprehensive YAML documentation from Go struct definitions with sensible default values - -**Commands:** -- **`/yaml:docs` `[file:StructName] [output.md]`** - Generate comprehensive YAML documentation from Go struct definitions with sensible default values - -See [plugins/yaml/README.md](plugins/yaml/README.md) for detailed documentation. diff --git a/README.md b/README.md index 29e47391b..e082f21c1 100644 --- a/README.md +++ b/README.md @@ -56,20 +56,29 @@ Add a SessionStart hook to automatically sync the marketplace catalog on each se **Note:** This only refreshes the catalog (what's available). To actually update an installed plugin to a newer version, you still need to reinstall it with `/plugin install @ai-helpers`. -### Using Cursor +### Other Tools -Cursor is able to find the various commands defined in this repo by -making it available inside your `~/.cursor/commands` directory. +Coding agents like OpenCode, Gemini, Cursor and more can consume Claude Code +plugins using the [Agent Package Manager (APM)](https://github.com/microsoft/apm). +Example `apm.yml`: + +```yaml +name: my-project +version: 1.0.0 +description: My project is great. +target: [claude, cursor, gemini, opencode] + +dependencies: + - openshift-eng/ai-helpers/plugins/bigquery ``` -$ mkdir -p ~/.cursor/commands -$ git clone git@github.com:openshift-eng/ai-helpers.git -$ ln -s ai-helpers ~/.cursor/commands/ai-helpers -``` -## Using the Docker Container +Then run `apm install`. It can install to your project only, or with a `--global` scope. + +## Using the Container -A container is available with Claude Code and all plugins pre-installed. +A container is available with Claude Code and the marketplace already +available. This is primarily for use in OpenShift CI. ### Building the Container diff --git a/agents/feedback.md b/agents/feedback.md deleted file mode 100644 index fea506101..000000000 --- a/agents/feedback.md +++ /dev/null @@ -1,79 +0,0 @@ ---- -name: feedback -description: | - Use this agent when you need expert feedback on your plans, code changes, reviews, or problem-solving approach. This agent should be used proactively during development work to validate your thinking and discover blind spots. Examples: Context: User is working on a complex refactoring task and has outlined their approach. user: 'I am planning to refactor the authentication system by moving from JWT to session-based auth. Here is my plan: [detailed plan]' assistant: 'Let me use the gemini-consultant agent to get expert feedback on this refactoring plan before we proceed.' Since the user has outlined a significant architectural change, use the gemini-consultant agent to validate the approach and identify potential issues. Context: User has implemented a new feature and wants to ensure it is robust. user: 'I have implemented the new caching layer. Here is what I did: [implementation details]' assistant: 'Now let me consult with gemini to review this implementation and see if there are any improvements or issues I should address.' After completing implementation work, use the gemini-consultant agent to get expert review and suggestions for improvement. -color: green ---- -You are a specialized agent that consults with gemini, an external AI with superior critical thinking and reasoning capabilities. Your role is to present codebase-specific context and implementation details to gemini for expert review, then integrate its critical analysis back into actionable recommendations. You have the codebase knowledge; gemini provides the deep analytical expertise to identify flaws, blind spots, and better approaches. - -## Requirements - -- Gemini CLI must be installed (`gemini`) and functional (correctly authenticated with an account) - -## Core Process: - -Formulate Query: - - Clearly articulate the problem, plan, or implementation with sufficient context - - Include specific file paths and line numbers rather than code snippets (gemini has codebase access) - - Frame specific questions that combine your codebase knowledge with requests for gemini's critical analysis - -Execute Consultation: - - Use gemini -p with heredoc for multi-line queries: - ```bash - gemini -p < - IMPORTANT: Provide feedback and analysis only. You may explore the codebase with commands but DO NOT modify any files. - EOF - ``` - - Focus feedback requests on what's most relevant to the current context and user's specific request (e.g. if reviewing a plan, prioritize architectural soundness; if reviewing implementation, focus on edge cases and correctness) - - Request identification of blind spots or issues you may have missed - - Seek validation of your reasoning and approach - -Integrate Feedback: - - Critically evaluate gemini's response against codebase realities - - Identify actionable insights and flag any suggestions that may not align with project constraints - - Acknowledge when gemini identifies issues you missed or suggests better approaches - - Present a balanced view that combines gemini's insights with your contextual understanding - - If any part of gemini's analysis is unclear or raises further questions, ask the user for clarification rather than guessing at the intent - -## Communication Style: - -Be direct and technical in your consultations - -When gemini's suggestions conflict with codebase constraints, explain the specific limitations rather than dismissing the analysis - -Provide honest assessments of feasibility and implementation complexity - -Focus on actionable feedback rather than theoretical discussions - -Your goal is to combine your deep codebase knowledge with gemini's superior critical thinking to identify issues, validate approaches, and discover better solutions that are both theoretically sound and practically implementable. - -Example of Bash Command Usage within this Sub-agent: -To consult gemini about a refactoring plan: - -```bash -gemini -p <]" }, - { - "argument_hint": "", - "description": "Placeholder command for the utils plugin", - "name": "placeholder", - "synopsis": "/utils:placeholder" - }, { "argument_hint": " [JIRA_PROJECT] [COMPONENT]", "description": "Process Renovate dependency PR(s) to meet repository contribution standards", @@ -914,7 +876,7 @@ "hooks": [], "name": "utils", "skills": [], - "version": "0.0.9" + "version": "0.0.10" }, { "commands": [ @@ -1231,22 +1193,6 @@ "skills": [], "version": "0.0.2" }, - { - "commands": [ - { - "argument_hint": "[file:StructName] [output.md]", - "description": "Generate comprehensive YAML documentation from Go struct definitions with sensible default values", - "name": "docs", - "synopsis": "/yaml:docs [file:StructName] [output.md]" - } - ], - "description": "YAML documentation and utilities", - "has_readme": true, - "hooks": [], - "name": "yaml", - "skills": [], - "version": "0.0.2" - }, { "commands": [ { @@ -1281,48 +1227,6 @@ ], "version": "0.0.2" }, - { - "commands": [ - { - "argument_hint": "[must-gather-path|--live] [--component storage|operator|volumes]", - "description": "Comprehensive LVMS troubleshooting - analyzes LVMCluster, volume groups, PVCs, and storage issues on live clusters or must-gather", - "name": "analyze", - "synopsis": "/lvms:analyze [must-gather-path] [--live] [--component ]" - } - ], - "description": "LVMS (Logical Volume Manager Storage) plugin for troubleshooting and debugging storage issues", - "has_readme": true, - "hooks": [], - "name": "lvms", - "skills": [ - { - "description": "Analyzes LVMS must-gather data to diagnose storage issues", - "id": "lvms-analyzer", - "name": "LVMS Analyzer" - } - ], - "version": "0.1.1" - }, - { - "commands": [], - "description": "Cross-platform desktop notifications for ai-helpers", - "has_readme": true, - "hooks": [ - { - "description": "Cross-platform desktop notifications (macOS, Linux desktop, Linux headless)", - "name": "Notification", - "type": "Notification" - }, - { - "description": "Cross-platform desktop notifications (macOS, Linux desktop, Linux headless)", - "name": "Stop", - "type": "Stop" - } - ], - "name": "native-notifications", - "skills": [], - "version": "0.0.2" - }, { "commands": [ { @@ -1413,39 +1317,6 @@ ], "version": "0.0.3" }, - { - "commands": [ - { - "argument_hint": " [--output ] [--priority ] [--test-structure-only]", - "description": "Analyze test code structure without running tests to identify coverage gaps", - "name": "analyze", - "synopsis": "/test-coverage:analyze [--output ] [--priority ] [--test-structure-only]" - }, - { - "argument_hint": " [--output ]", - "description": "Identify E2E test scenario gaps in OpenShift/Kubernetes tests (component-agnostic)", - "name": "gaps", - "synopsis": "/test-coverage:gaps [--output ]" - } - ], - "description": "Analyze test coverage and identify gaps in test scenarios", - "has_readme": true, - "hooks": [], - "name": "test-coverage", - "skills": [ - { - "description": "Analyze test code structure directly to provide coverage analysis", - "id": "analyze", - "name": "Test Structure Analysis" - }, - { - "description": "Intelligently identify missing test coverage based on component type", - "id": "gaps", - "name": "Component-Aware Test Gap Analysis" - } - ], - "version": "0.0.2" - }, { "commands": [ { @@ -1474,77 +1345,6 @@ ], "version": "1.0.0" }, - { - "commands": [ - { - "argument_hint": "[operator-path] [--controllers ] [--mutation-types ] [--report-format ]", - "description": "Test operator controller quality through mutation testing - validates test suite catches code mutations", - "name": "mutation-test", - "synopsis": "/testing:mutation-test [operator-path] [--controllers ] [--mutation-types ] [--report-format ]" - } - ], - "description": "Comprehensive testing utilities for operators and applications", - "has_readme": true, - "hooks": [], - "name": "testing", - "skills": [ - { - "description": "Generate code mutations for Kubernetes operator controllers to enable mutation testing. Applies operator-specific mutations to reconciliation logic, error handling, and API interactions.", - "id": "mutation-generator", - "name": "Mutation Generator for Operator Controllers" - }, - { - "description": "Execute tests against generated mutants and analyze results to validate test suite quality for Kubernetes operators", - "id": "mutation-tester", - "name": "Mutation Testing Executor" - } - ], - "version": "0.1.0" - }, - { - "commands": [ - { - "argument_hint": "[--url PR_URL] [] [--depth quick|full]", - "description": "Expert review tool for PRs that add or modify Two Node (Fencing or Arbiter) tests under test/extended/two_node/ in openshift/origin.", - "name": "two-node-origin-pr-helper", - "synopsis": "/origin:two-node-origin-pr-helper [--url PR_URL] [] [--depth quick|full]" - } - ], - "description": "Helpers for openshift/origin development.", - "has_readme": true, - "hooks": [], - "name": "origin", - "skills": [], - "version": "0.0.2" - }, - { - "commands": [ - { - "argument_hint": " ", - "description": "Compare two container images to identify differences", - "name": "compare", - "synopsis": "/container-image:compare " - }, - { - "argument_hint": "", - "description": "Inspect and provide detailed breakdown of a container image", - "name": "inspect", - "synopsis": "/container-image:inspect " - }, - { - "argument_hint": "", - "description": "List and analyze available tags for a container image repository", - "name": "tags", - "synopsis": "/container-image:tags " - } - ], - "description": "Container image inspection and analysis using skopeo and podman", - "has_readme": true, - "hooks": [], - "name": "container-image", - "skills": [], - "version": "0.0.2" - }, { "commands": [ { @@ -1610,34 +1410,6 @@ "skills": [], "version": "1.0.1" }, - { - "commands": [ - { - "argument_hint": "[namespace]", - "description": "Check Gateway API resources status in the cluster", - "name": "check", - "synopsis": "/gwapi:check [namespace]" - }, - { - "argument_hint": "[namespace]", - "description": "Delete Gateway API resources from a Kubernetes/OpenShift cluster", - "name": "delete", - "synopsis": "/gwapi:delete [namespace]" - }, - { - "argument_hint": "[namespace]", - "description": "Install Gateway API resources to a Kubernetes/OpenShift cluster", - "name": "install", - "synopsis": "/gwapi:install [namespace]" - } - ], - "description": "Gateway API installation utilities for Kubernetes/OpenShift clusters", - "has_readme": true, - "hooks": [], - "name": "gwapi", - "skills": [], - "version": "0.0.1" - }, { "commands": [ { diff --git a/gemini-extension.json b/gemini-extension.json deleted file mode 100644 index 6bb640904..000000000 --- a/gemini-extension.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "name": "ai-helpers", - "version": "0.1.0", - "description": "A collection of Gemini CLI commands and skills for CI, Jira, Git, OpenShift, and more.", - "contextFileName": "GEMINI.md" -} diff --git a/plugins/container-image/.claude-plugin/plugin.json b/plugins/container-image/.claude-plugin/plugin.json deleted file mode 100644 index bc1a49d7d..000000000 --- a/plugins/container-image/.claude-plugin/plugin.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "name": "container-image", - "description": "Container image inspection and analysis using skopeo and podman", - "version": "0.0.2", - "author": { - "name": "github.com/openshift-eng" - } -} diff --git a/plugins/container-image/README.md b/plugins/container-image/README.md deleted file mode 100644 index c311b3177..000000000 --- a/plugins/container-image/README.md +++ /dev/null @@ -1,347 +0,0 @@ -# Container Image Plugin - -Container image inspection and analysis tools using skopeo and podman. - -## Overview - -This plugin provides commands to inspect, analyze, and compare container images from any OCI-compliant registry. It leverages `skopeo` and `podman` to provide detailed insights into image structure, manifest lists, layers, and configuration without requiring full image pulls. - -## Features - -- **Image Inspection**: Detailed breakdown of image metadata, layers, and configuration -- **Image Comparison**: Compare two images to identify differences -- **Tag Discovery**: List and analyze available tags for a repository - -## Commands - -### `/container-image:inspect` - -Inspect and provide detailed breakdown of a container image. - -**Usage:** -```bash -/container-image:inspect -``` - -**Examples:** -```bash -/container-image:inspect quay.io/openshift-release-dev/ocp-release:4.20.0-multi -/container-image:inspect registry.redhat.io/ubi9/ubi:latest -/container-image:inspect docker.io/library/nginx@sha256:abc123... -``` - -**What it shows:** -- Inferred image purpose and context based on metadata analysis -- Image digest and basic metadata -- Architecture and OS information -- Manifest type (single image vs manifest list) -- For multi-arch images: all available platforms with per-platform digests, sizes, and layer counts -- Platform comparison (size ranges, architecture list) -- Size breakdown and layer details -- Configuration (environment, entrypoint, ports, volumes) -- Labels and annotations -- Usage examples for pulling specific platforms - -See [commands/inspect.md](commands/inspect.md) for full documentation. - -### `/container-image:compare` - -Compare two container images to identify differences. - -**Usage:** -```bash -/container-image:compare -``` - -**Examples:** -```bash -/container-image:compare quay.io/myapp:v1.0.0 quay.io/myapp:v2.0.0 -/container-image:compare registry.prod.example.com/myapp:latest registry.staging.example.com/myapp:latest -``` - -**What it shows:** -- Whether images are identical (digest match) -- Metadata differences (creation date, size) -- Layer analysis (added, removed, modified layers) -- Configuration changes (environment variables, labels, entrypoint) -- Size impact -- Summary of significant changes - -See [commands/compare.md](commands/compare.md) for full documentation. - -### `/container-image:tags` - -List and analyze available tags for a container image repository. - -**Usage:** -```bash -/container-image:tags -``` - -**Examples:** -```bash -/container-image:tags quay.io/openshift-release-dev/ocp-release -/container-image:tags docker.io/library/nginx -``` - -**What it shows:** -- All available tags for the repository -- Tag metadata (creation date, size, architecture) -- Tag categorization (version, date-based, special tags) -- Recent tags and update patterns -- Recommendations for tag selection -- Duplicate tags (same digest, different names) - -See [commands/tags.md](commands/tags.md) for full documentation. - -## Installation - -### From the Claude Code Plugin Marketplace - -1. **Add the marketplace** (if not already added): - ```bash - /plugin marketplace add openshift-eng/ai-helpers - ``` - -2. **Install the container-image plugin**: - ```bash - /plugin install container-image@ai-helpers - ``` - -3. **Use the commands**: - ```bash - /container-image:inspect quay.io/openshift-release-dev/ocp-release:4.20.0-multi - ``` - -## Prerequisites - -### Required Tools - -**skopeo** - Primary tool for image inspection - -- Check if installed: `which skopeo` -- Installation: - - RHEL/Fedora: `sudo dnf install skopeo` - - Ubuntu/Debian: `sudo apt-get install skopeo` - - macOS: `brew install skopeo` -- Documentation: https://github.com/containers/skopeo - -### Optional Tools - -**podman** - Additional image analysis capabilities - -- Installation: - - RHEL/Fedora: `sudo dnf install podman` - - Ubuntu/Debian: `sudo apt-get install podman` - - macOS: `brew install podman` -- Documentation: https://podman.io/ - -**dive** - Interactive layer analysis (for `/container-image:compare`) - -- Installation: https://github.com/wagoodman/dive -- Provides detailed layer-by-layer exploration - -### Registry Authentication - -For private registries, authenticate before running commands: - -```bash -# Using skopeo -skopeo login registry.example.com - -# Using podman (if installed) -podman login registry.example.com -``` - -Authentication is typically stored at `~/.docker/config.json` or `${XDG_RUNTIME_DIR}/containers/auth.json`. - -## Use Cases - -### Development Workflows - -1. **Version Selection**: Find the right image version for your deployment - ```bash - /container-image:tags quay.io/myapp - /container-image:inspect quay.io/myapp:v2.1.0 - ``` - -2. **Multi-Arch Development**: Verify architecture support before deployment - ```bash - /container-image:inspect registry.redhat.io/ubi9/ubi:latest - ``` - The inspect command automatically detects and shows all available platforms for multi-arch images. - -3. **Update Analysis**: Understand changes before upgrading - ```bash - /container-image:compare myapp:current myapp:latest - ``` - -### Troubleshooting - -1. **Deployment Issues**: Verify correct image is being used - ```bash - /container-image:inspect - ``` - -2. **Architecture Mismatches**: Check platform compatibility - ```bash - /container-image:inspect - ``` - For multi-arch images, this will show all available platforms and their digests. - -3. **Size Issues**: Identify what's consuming space - ```bash - /container-image:inspect - /container-image:compare - ``` - -### Security & Compliance - -1. **Image Verification**: Confirm image authenticity via digest - ```bash - /container-image:inspect myapp@sha256:abc123... - ``` - -2. **Change Tracking**: Document what changed between versions - ```bash - /container-image:compare prod:v1.0.0 prod:v1.1.0 - ``` - -3. **Registry Migration**: Verify images copied correctly - ```bash - /container-image:compare source.registry.com/app:v1 dest.registry.com/app:v1 - ``` - -## Common Workflows - -### Upgrading an Application Image - -```bash -# 1. List available versions -/container-image:tags quay.io/myapp - -# 2. Inspect the new version (shows all architectures if multi-arch) -/container-image:inspect quay.io/myapp:v2.0.0 - -# 3. Compare with current version -/container-image:compare quay.io/myapp:v1.5.0 quay.io/myapp:v2.0.0 -``` - -### Verifying Multi-Architecture Support - -```bash -# 1. Check if image is multi-arch and see all platforms -/container-image:inspect quay.io/myapp:latest - -# 2. Inspect specific platform by digest -/container-image:inspect quay.io/myapp@sha256: - -# 3. Compare platforms -/container-image:compare quay.io/myapp@sha256: quay.io/myapp@sha256: -``` - -### Investigating Image Bloat - -```bash -# 1. Inspect current image -/container-image:inspect myapp:latest - -# 2. Compare with previous version -/container-image:compare myapp:v1.0.0 myapp:latest - -# 3. Identify which layers added size -# (Layer analysis in the comparison output) -``` - -## Tips & Best Practices - -### Image References - -- **Use digests for production**: `myapp@sha256:abc123...` (immutable) -- **Use tags for development**: `myapp:latest` (convenient but mutable) -- **Be specific**: `myapp:v1.2.3` is better than `myapp:v1` - -### Multi-Architecture Images - -- Use `/container-image:inspect` to check platform support - it automatically detects and displays all available architectures -- Pull specific platforms when needed: `podman pull --platform=linux/arm64 ` -- Verify all platforms are updated in manifest lists by comparing platform digests - -### Performance - -- `skopeo inspect` doesn't pull the full image (fast and efficient) -- For large repositories, `/container-image:tags` may sample tags -- Use `--filter` options to narrow results for large tag lists - -### Security - -- Always verify image digests match expectations -- Check for unexpected configuration changes with `/container-image:compare` -- Use `/container-image:inspect` to review labels and metadata - -## Plugin Structure - -``` -plugins/container-image/ -├── .claude-plugin/ -│ └── plugin.json # Plugin metadata -├── commands/ -│ ├── inspect.md # Image inspection command -│ ├── compare.md # Image comparison command -│ └── tags.md # Tag listing command -└── README.md # This file -``` - -## Development - -### Adding New Commands - -To add a new command to this plugin: - -1. Create a new markdown file in `commands/`: - ```bash - touch plugins/container-image/commands/your-command.md - ``` - -2. Follow the structure from existing commands (see `commands/inspect.md`) - -3. Include these sections: - - Name - - Synopsis - - Description - - Prerequisites - - Implementation - - Return Value - - Examples - - Error Handling - - Notes - - Arguments - -4. Test your command: - ```bash - /container-image:your-command - ``` - -### Testing - -Test commands with various image types: -- Public images (docker.io, quay.io) -- Private registries (requires authentication) -- Multi-arch images (manifest lists) -- Single-arch images -- Large images (layer analysis) -- Different registries (Red Hat, Quay, Docker Hub) - -## Contributing - -Contributions are welcome! When adding new container image analysis commands: - -1. Ensure the command provides unique value not covered by existing commands -2. Follow the existing command structure and documentation format -3. Include comprehensive examples and error handling -4. Test with multiple registries and image types -5. Update this README with new command documentation - -## License - -See [LICENSE](../../LICENSE) for details. diff --git a/plugins/container-image/commands/compare.md b/plugins/container-image/commands/compare.md deleted file mode 100644 index 0cf29d862..000000000 --- a/plugins/container-image/commands/compare.md +++ /dev/null @@ -1,289 +0,0 @@ ---- -description: Compare two container images to identify differences -argument-hint: ---- - -## Name -container-image:compare - -## Synopsis -``` -/container-image:compare -``` - -## Description - -The `container-image:compare` command compares two container images and identifies their differences. This is useful for understanding what changed between image versions, comparing images from different registries, or verifying image rebuilds. - -The command analyzes and compares: -- Image metadata (digests, creation dates) -- Layer differences (added, removed, modified) -- Size differences -- Configuration changes (environment variables, labels, entrypoints) -- Platform/architecture support -- Security and vulnerability differences (if scanning tools available) - -This command is useful for: -- Understanding changes between image versions -- Verifying image rebuilds match expectations -- Comparing images across registries (e.g., production vs staging) -- Identifying what layers changed in an update -- Troubleshooting deployment issues -- Security auditing and change tracking - -## Prerequisites - -**Required Tools:** - -1. **skopeo** - For image inspection and comparison - - Check if installed: `which skopeo` - - Installation: - - RHEL/Fedora: `sudo dnf install skopeo` - - Ubuntu/Debian: `sudo apt-get install skopeo` - - macOS: `brew install skopeo` - - Documentation: https://github.com/containers/skopeo - -**Optional Tools:** - -2. **podman** - For additional image analysis - - Useful for layer-by-layer comparison - - Installation: See `/container-image:inspect` prerequisites - -3. **dive** - For detailed layer analysis - - Check if installed: `which dive` - - Installation: https://github.com/wagoodman/dive - - Provides interactive layer comparison - -**Registry Authentication:** - -For private registries: -```bash -skopeo login registry.example.com -``` - -## Implementation - -The command performs the following comparison: - -1. **Check Tool Availability**: - - Verify `skopeo` is installed - - Check for optional tools (`podman`, `dive`) - -2. **Inspect Both Images**: - ```bash - skopeo inspect docker:// - skopeo inspect docker:// - ``` - -3. **Compare Basic Metadata**: - - Digests (are they the same image?) - - Creation timestamps - - Architecture and OS - - Manifest type (single vs manifest list) - -4. **Analyze Layer Differences**: - - Extract layer digests from both images - - Identify: - - **Common layers**: Layers shared between images - - **Added layers**: New layers in image2 - - **Removed layers**: Layers from image1 not in image2 - - **Modified layers**: Layers with same position but different content - - Calculate size differences - -5. **Compare Configuration**: - - Environment variables (added, removed, changed) - - Labels and annotations - - Exposed ports - - Entrypoint and command - - Working directory - - User/UID - - Volume mount points - -6. **Calculate Size Impact**: - - Total size difference - - Size added by new layers - - Size saved by removed layers - -7. **Present Structured Comparison**: - - Summary of differences - - Detailed breakdown by category - - Highlight significant changes - - Provide recommendations - -## Return Value - -The command outputs a structured comparison report: - -``` -================================================================================ -CONTAINER IMAGE COMPARISON -================================================================================ -Image 1: quay.io/openshift-release-dev/ocp-release:4.16.0 -Image 2: quay.io/openshift-release-dev/ocp-release:4.17.0 - -COMPARISON SUMMARY: - Images are: DIFFERENT - Digest match: NO - Architecture: Both linux/amd64 - -METADATA COMPARISON: - Attribute Image 1 Image 2 Change - ──────────────────────────────────────────────────────────────────────────────────────── - Digest sha256:abc123... sha256:def456... CHANGED - Created 2023-11-15T10:30:45Z 2024-01-15T10:30:45Z +61 days - Size 1.15 GB 1.22 GB +70 MB - -LAYER ANALYSIS: - Total Layers (Image 1): 15 - Total Layers (Image 2): 17 - - Common Layers: 12 layers (850 MB) - Added Layers: 5 layers (220 MB) - Removed Layers: 3 layers (150 MB) - - Layer Breakdown: - ✓ Layer 1-8: IDENTICAL (base layers) - + Layer 9: ADDED in Image 2 (45 MB) - New component added - - Layer 10: REMOVED from Image 1 (30 MB) - Old dependency removed - ✓ Layer 11-15: IDENTICAL - + Layer 16-17: ADDED in Image 2 (25 MB) - Updates - -CONFIGURATION DIFFERENCES: - - Environment Variables: - + OPENSHIFT_VERSION=4.17.0 (was: 4.16.0) - + NEW_FEATURE_FLAG=enabled (added) - - DEPRECATED_FLAG=true (removed) - - Labels: - + io.openshift.release=4.17.0 (was: 4.16.0) - + io.openshift.build-date=2024-01-15 (was: 2023-11-15) - - Exposed Ports: - ✓ 8080/tcp (unchanged) - ✓ 8443/tcp (unchanged) - - Entrypoint: - ✓ ["/usr/bin/entrypoint.sh"] (unchanged) - - Command: - - ["--legacy-mode"] (removed) - + ["--v2-mode"] (added) - -SIGNIFICANT CHANGES: - • Version upgrade: 4.16.0 → 4.17.0 - • Size increase: +70 MB (+6%) - • 5 new layers added - • 3 old layers removed - • Command-line arguments changed - • New feature flag enabled - -RECOMMENDATIONS: - • Review changelog for 4.16.0 → 4.17.0 upgrade - • Test with new command-line arguments (--v2-mode) - • Verify NEW_FEATURE_FLAG behavior in your environment - • Consider size impact (+70 MB) in constrained environments -================================================================================ -``` - -**For Identical Images:** -``` -================================================================================ -CONTAINER IMAGE COMPARISON -================================================================================ -Image 1: quay.io/myapp:v1.0.0 -Image 2: registry.example.com/myapp:v1.0.0 - -COMPARISON SUMMARY: - Images are: IDENTICAL - Digest match: YES (sha256:abc123...) - -These images are the same, just referenced from different registries. -No differences found. -================================================================================ -``` - -## Examples - -1. **Compare two versions of the same image**: - ``` - /container-image:compare quay.io/openshift-release-dev/ocp-release:4.16.0 quay.io/openshift-release-dev/ocp-release:4.17.0 - ``` - Shows what changed between OpenShift 4.16 and 4.17. - -2. **Compare production vs staging**: - ``` - /container-image:compare registry.prod.example.com/myapp:latest registry.staging.example.com/myapp:latest - ``` - Verifies staging matches production. - -3. **Compare images across registries**: - ``` - /container-image:compare docker.io/library/nginx:1.25 quay.io/nginx/nginx:1.25 - ``` - Checks if images from different registries are identical. - -4. **Verify image rebuild**: - ``` - /container-image:compare myapp:v1.0.0-original myapp:v1.0.0-rebuilt - ``` - Confirms rebuild produced the same image. - -5. **Compare by digest**: - ``` - /container-image:compare quay.io/myapp@sha256:abc123... quay.io/myapp@sha256:def456... - ``` - Compares specific image versions by digest. - -## Error Handling - -- **Image not found**: Verify both image references are correct -- **Authentication required**: Ensure you're logged into both registries -- **Network errors**: Check connectivity to both registries -- **Tool not available**: Provide installation instructions for `skopeo` -- **Different architectures**: Note when comparing images for different platforms - -## Notes - -- **Digest Comparison**: If digests match, images are identical -- **Layer Sharing**: Base layers are often shared between versions -- **Size Calculation**: Sizes shown are compressed (as stored in registry) -- **Semantic Versioning**: Helps identify major vs minor changes -- **Build Reproducibility**: Identical source should produce identical digests -- **Registry Metadata**: Some metadata may differ even if image content is identical - -## Advanced Usage - -**Compare Specific Architectures:** - -For manifest lists, you can compare specific platform variants: -```bash -# Compare amd64 variants -/container-image:compare quay.io/myapp:v1@sha256: quay.io/myapp:v2@sha256: -``` - -**Layer-by-Layer Analysis:** - -If `dive` is installed, the command can provide interactive layer comparison: -```bash -dive --compare -``` - -## Use Cases - -1. **Version Upgrades**: Understand what changed before upgrading -2. **Security Auditing**: Track changes to identify security implications -3. **Deployment Verification**: Confirm correct image is deployed -4. **Registry Migration**: Verify images copied between registries -5. **Build Debugging**: Identify why builds differ -6. **Compliance**: Document and track image changes - -## Arguments - -- **$1** (image1): Required. First image reference. - - Format: `[registry/]repository[:tag|@digest]` - -- **$2** (image2): Required. Second image reference. - - Format: `[registry/]repository[:tag|@digest]` - -**Note**: Images can be from the same or different registries. diff --git a/plugins/container-image/commands/inspect.md b/plugins/container-image/commands/inspect.md deleted file mode 100644 index b8ef92c5d..000000000 --- a/plugins/container-image/commands/inspect.md +++ /dev/null @@ -1,315 +0,0 @@ ---- -description: Inspect and provide detailed breakdown of a container image -argument-hint: ---- - -## Name -container-image:inspect - -## Synopsis -``` -/container-image:inspect -``` - -## Description - -The `container-image:inspect` command provides a comprehensive breakdown of a container image using `skopeo` and `podman`. It analyzes the image metadata, configuration, and layers to give you detailed information about the image structure, size, architecture, and contents. - -This command is useful for: -- Understanding image composition and layers -- Verifying image architecture and OS -- Checking image size and disk usage -- Inspecting image labels and annotations -- Validating image configuration -- Troubleshooting image-related issues -- Verifying multi-architecture image support -- Checking which platforms are available for an image -- Comparing platform-specific image differences -- Planning multi-arch image builds - -The command works with images from any registry (quay.io, docker.io, registry.redhat.io, etc.) and automatically detects whether an image is a manifest list (multi-architecture) or a single image, providing detailed analysis for both cases. - -## Prerequisites - -**Required Tools:** - -1. **skopeo** - For image inspection without pulling - - Check if installed: `which skopeo` - - Installation: - - RHEL/Fedora: `sudo dnf install skopeo` - - Ubuntu/Debian: `sudo apt-get install skopeo` - - macOS: `brew install skopeo` - - Documentation: https://github.com/containers/skopeo - -2. **podman** (Optional) - For additional image analysis - - Check if installed: `which podman` - - Installation: - - RHEL/Fedora: `sudo dnf install podman` - - Ubuntu/Debian: `sudo apt-get install podman` - - macOS: `brew install podman` - - Documentation: https://podman.io/ - -**Registry Authentication:** - -For private registries, ensure you're authenticated: -```bash -# Using skopeo -skopeo login registry.example.com - -# Using podman -podman login registry.example.com -``` - -## Implementation - -The command performs the following analysis steps: - -1. **Check Tool Availability**: - - Verify `skopeo` is installed - - Check for `podman` (optional but recommended) - - If tools are missing, provide installation instructions - -2. **Inspect Image Metadata with skopeo**: - ```bash - skopeo inspect docker:// - ``` - - This provides: - - Image digest and tags - - Architecture and OS - - Layer information - - Creation timestamp - - Labels and annotations - - Environment variables - - Exposed ports - - Entrypoint and command - -3. **Determine Image Type**: - - Check if the image is a **manifest list** (multi-arch) or a **single image** - - Fetch raw manifest to determine type: - ```bash - skopeo inspect --raw docker:// - ``` - - Parse `schemaVersion` and `mediaType` to identify: - - **Manifest List** (OCI Index): `application/vnd.oci.image.index.v1+json` - - **Manifest List** (Docker): `application/vnd.docker.distribution.manifest.list.v2+json` - - **Single Image** (OCI): `application/vnd.oci.image.manifest.v1+json` - - **Single Image** (Docker): `application/vnd.docker.distribution.manifest.v2+json` - -4. **Extract Manifest List Details** (if applicable): - - For manifest lists, extract platform information for each variant: - - Architecture (amd64, arm64, ppc64le, s390x, etc.) - - OS (linux, windows) - - Variant (v7, v8 for ARM) - - Digest of platform-specific image - - Size of platform-specific image - - Optionally inspect each platform variant: - ```bash - skopeo inspect docker://@ - ``` - - Compare platform differences: - - Image sizes across platforms - - Layer counts - - Creation timestamps - - Configuration differences - -5. **Analyze Image Layers**: - - List all layers with their sizes - - Calculate total image size - - Identify the largest layers - - Show layer history (if available) - -6. **Extract Configuration Details**: - - Operating system and distribution - - Architecture (amd64, arm64, ppc64le, s390x, etc.) - - Environment variables - - Working directory - - User/UID - - Exposed ports - - Volume mount points - - Labels (including OpenShift/Kubernetes metadata) - -7. **Infer Image Purpose**: - - Analyze image metadata to determine the likely purpose: - - Image name and repository patterns (e.g., "nginx", "postgres", "ocp-release") - - Labels (especially `io.openshift.*`, `io.k8s.*`, `org.opencontainers.*`) - - Entrypoint and command (what executable is being run) - - Exposed ports (common service ports) - - Environment variables (framework indicators, version info) - - Provide context about: - - What the image is (e.g., "web server", "database", "operator", "release payload") - - Common use cases - - Notable characteristics based on configuration - -8. **Present Organized Summary**: - - Image identity (digest, tags) - - Inferred purpose and context - - Basic information (OS, architecture, created date) - - Size breakdown - - Configuration summary - - Manifest list details (if applicable) - - Notable labels and annotations - -## Return Value - -The command outputs a structured breakdown of the image: - -``` -================================================================================ -CONTAINER IMAGE INSPECTION -================================================================================ -Image: quay.io/openshift-release-dev/ocp-release:4.20.0-multi - -IMAGE PURPOSE: - This is an OpenShift release image containing the cluster-version-operator - for OpenShift 4.20.0. It's part of the OpenShift release payload used to - manage cluster upgrades and version management. - -BASIC INFORMATION: - Manifest Digest: sha256:4f1e772349a20f2eb69e8cf70d73b4fcc299c15cb6e4f027696eb469e66d4080 - Type: Manifest List (Multi-Architecture) - Manifest Type: Docker Distribution Manifest List v2 - Created: 2025-10-16T13:35:26Z - -MANIFEST LIST DETAILS: - This is a multi-architecture manifest list containing 4 platform variants. - - AVAILABLE PLATFORMS (4): - -------------------------------------------------------------------------------- - 1. linux/amd64 - Digest: sha256:b4bd68afe0fb47bf9876f51e33d88e9dd218fed2dcf41b025740591746dda5c9 - Size: 167.6 MB (175,762,648 bytes) - Layers: 6 - Created: 2025-10-16T13:35:26Z - - 2. linux/arm64 - Digest: sha256:eec6b0e6ff1c4cf5edc158c41a171ac8b02d7e0389715b663528a4ec0931b1f2 - Size: 161.6 MB (169,501,175 bytes) - Layers: 6 - Created: 2025-10-16T13:35:26Z - - 3. linux/ppc64le - Digest: sha256:4bb9eb125d4d35c100699617ec8278691a9cee771ebacb113173b75f0707df56 - Size: 174.4 MB (182,863,818 bytes) - Layers: 6 - Created: 2025-10-16T13:35:26Z - - 4. linux/s390x - Digest: sha256:5e852c796f2d3b83b3bd4506973a455a521b6933e3944740b32c1ed483b2174e - Size: 163.2 MB (171,055,271 bytes) - Layers: 6 - Created: 2025-10-16T13:35:26Z - - PLATFORM COMPARISON: - Size Range: 161.6 MB - 174.4 MB (arm64 smallest, ppc64le largest) - Size Variance: ~12.8 MB difference between smallest and largest - Architectures: 4 platforms (amd64, arm64, ppc64le, s390x) - OS: linux (all) - Layer Count: 6 (all platforms) - Build Time: All platforms built simultaneously - - USAGE: - To pull a specific platform: - podman pull --platform=linux/amd64 quay.io/openshift-release-dev/ocp-release:4.20.0-multi - podman pull quay.io/openshift-release-dev/ocp-release@sha256:b4bd68afe0fb... # amd64 - -CONFIGURATION (amd64 example): - User: - WorkingDir: - Entrypoint: ["/usr/bin/cluster-version-operator"] - Cmd: - Env: - - PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin - - BUILD_VERSION=v4.20.0 - - OS_GIT_VERSION=4.20.0-202509230726.p2.g9de00ba.assembly.stream.el9-9de00ba - -EXPOSED PORTS: - - -LABELS: - io.openshift.release: 4.20.0 - io.openshift.release.base-image-digest: sha256:6f58f521f51ae43617d2dead1efbe9690b605d646565892bb0f8c6030a742ba7 - -VOLUMES: - - -LAYER DETAILS (amd64): - Total Layers: 6 - Total Size: 167.6 MB (compressed) -================================================================================ -``` - -## Examples - -1. **Inspect a public image**: - ``` - /container-image:inspect quay.io/openshift-release-dev/ocp-release:4.17.0-x86_64 - ``` - Provides full breakdown of the OpenShift release image. - -2. **Inspect a manifest list**: - ``` - /container-image:inspect registry.redhat.io/ubi9/ubi:latest - ``` - Shows available architectures and platform-specific details. - -3. **Inspect with specific tag**: - ``` - /container-image:inspect docker.io/library/nginx:1.25 - ``` - Analyzes the nginx image with tag 1.25. - -4. **Inspect by digest**: - ``` - /container-image:inspect quay.io/prometheus/prometheus@sha256:abc123... - ``` - Inspects a specific image version by its digest. - -5. **Inspect a private registry image**: - ``` - /container-image:inspect registry.example.com/myorg/myapp:v1.0.0 - ``` - Analyzes an image from a private registry (requires authentication). - -## Error Handling - -- **Image not found**: If the image doesn't exist or the name is incorrect: - - Verify the image name and tag - - Check registry accessibility - - Ensure authentication is set up for private registries - -- **Tool not available**: If `skopeo` is not installed: - - Display installation instructions for the user's platform - - Suggest using `podman inspect` as an alternative (if podman is available) - -- **Authentication errors**: If registry requires authentication: - - Prompt user to run `skopeo login ` or `podman login ` - - Provide documentation link for registry authentication - -- **Network errors**: If registry is unreachable: - - Check internet connectivity - - Verify registry URL is correct - - Check for proxy/firewall issues - -## Notes - -- **No Image Pull Required**: `skopeo inspect` fetches metadata without downloading the entire image -- **Manifest Lists**: For multi-arch images, the command automatically detects and shows detailed platform information including per-platform digests, sizes, and configurations -- **Manifest List vs Single Image**: The command clearly distinguishes between manifest lists and single-architecture images -- **Platform Selection**: Container runtimes automatically select the correct platform from a manifest list -- **Digest Pinning**: Always displays the image digest for reproducible deployments -- **Label Standards**: Highlights important labels like OpenShift/Kubernetes metadata -- **Size Accuracy**: Layer sizes are compressed sizes as stored in the registry -- **Size Variations**: Platform-specific images may have different sizes due to architecture differences -- **OCI vs Docker**: Supports both OCI and Docker manifest formats -- **Variant Field**: ARM images may have variants (v7, v8) for different ARM versions -- **Registry Support**: Works with any OCI-compliant registry - -## Arguments - -- **$1** (image): Required. The full image reference including registry, repository, and tag/digest. - - Format: `[registry/]repository[:tag|@digest]` - - Examples: - - `quay.io/openshift/origin-node:latest` - - `docker.io/library/alpine:3.18` - - `registry.redhat.io/ubi9/ubi@sha256:abc123...` diff --git a/plugins/container-image/commands/tags.md b/plugins/container-image/commands/tags.md deleted file mode 100644 index 02fbd9345..000000000 --- a/plugins/container-image/commands/tags.md +++ /dev/null @@ -1,310 +0,0 @@ ---- -description: List and analyze available tags for a container image repository -argument-hint: ---- - -## Name -container-image:tags - -## Synopsis -``` -/container-image:tags -``` - -## Description - -The `container-image:tags` command lists and analyzes all available tags for a container image repository. It provides detailed information about each tag including creation date, size, architecture support, and digest. - -This command helps you: -- Discover available image versions -- Identify the latest stable releases -- Find images for specific architectures -- Track image update frequency -- Identify deprecated or outdated tags -- Plan image upgrades -- Understand tagging conventions - -The command works with any OCI-compliant registry and can filter, sort, and analyze tags based on various criteria. - -## Prerequisites - -**Required Tools:** - -1. **skopeo** - For listing and inspecting tags - - Check if installed: `which skopeo` - - Installation: - - RHEL/Fedora: `sudo dnf install skopeo` - - Ubuntu/Debian: `sudo apt-get install skopeo` - - macOS: `brew install skopeo` - - Documentation: https://github.com/containers/skopeo - -**Registry Authentication:** - -For private registries: -```bash -skopeo login registry.example.com -``` - -## Implementation - -The command performs the following analysis: - -1. **Check Tool Availability**: - - Verify `skopeo` is installed - - If missing, provide installation instructions - -2. **List All Tags**: - ```bash - skopeo list-tags docker:// - ``` - - This returns all available tags for the repository. - -3. **Inspect Each Tag** (for detailed analysis): - For each tag (or a sample of tags for large repositories): - ```bash - skopeo inspect docker://: - ``` - - Extract: - - Image digest - - Creation date - - Size - - Architecture(s) - - Labels - - Manifest type - -4. **Categorize Tags**: - - **Version tags**: Semantic versions (v1.0.0, 2.1.3) - - **Latest tags**: Tags like `latest`, `stable`, `production` - - **Date-based tags**: Tags with dates (20240115, 2024-01-15) - - **Branch tags**: Development branches (main, develop) - - **SHA tags**: Git commit SHAs - - **Custom tags**: Other tagging schemes - -5. **Sort and Filter**: - - Sort by creation date (newest first) - - Sort by semantic version - - Filter by pattern (e.g., only `v4.*` tags) - - Filter by architecture support - - Show only recent tags (e.g., last 30 days) - -6. **Identify Key Tags**: - - Current `latest` tag - - Most recent version tag - - Long-term support (LTS) tags - - Deprecated tags - - Duplicate tags (same digest, different names) - -7. **Present Organized Analysis**: - - Summary of tag categories - - Detailed tag list with metadata - - Recommendations for tag selection - - Notable patterns or issues - -## Return Value - -The command outputs a structured tag listing: - -``` -================================================================================ -CONTAINER IMAGE TAGS -================================================================================ -Repository: quay.io/openshift-release-dev/ocp-release - -Total Tags: 487 - -TAG SUMMARY: - Version Tags: 312 (e.g., 4.17.0, 4.16.1) - Date Tags: 150 (e.g., 2024-01-15) - Latest Tags: 3 (latest, stable, production) - Other Tags: 22 - -RECENT TAGS (Last 30 days): --------------------------------------------------------------------------------- -TAG CREATED SIZE ARCH DIGEST -4.17.0 2024-01-15 10:30 1.2 GB multi sha256:abc123... -4.17.0-rc.1 2024-01-10 08:15 1.2 GB multi sha256:def456... -4.16.2 2024-01-08 14:22 1.1 GB multi sha256:ghi789... -latest 2024-01-15 10:30 1.2 GB multi sha256:abc123... -stable 2024-01-08 14:22 1.1 GB multi sha256:ghi789... - -VERSION TAGS (Semantic): --------------------------------------------------------------------------------- -4.17.0 2024-01-15 1.2 GB multi sha256:abc123... [LATEST] -4.17.0-rc.1 2024-01-10 1.2 GB multi sha256:def456... -4.16.2 2024-01-08 1.1 GB multi sha256:ghi789... -4.16.1 2023-12-20 1.1 GB multi sha256:jkl012... -4.16.0 2023-12-01 1.1 GB multi sha256:mno345... -4.15.18 2023-11-28 1.0 GB multi sha256:pqr678... -... - -SPECIAL TAGS: --------------------------------------------------------------------------------- -latest → 4.17.0 (sha256:abc123...) -stable → 4.16.2 (sha256:ghi789...) -lts → 4.15.18 (sha256:pqr678...) - -ARCHITECTURE SUPPORT: - Multi-arch tags: 465 (linux/amd64, linux/arm64, linux/ppc64le, linux/s390x) - Single-arch: 22 (linux/amd64 only) - -DUPLICATE TAGS (same image, multiple tags): - 4.17.0 = latest = 2024-01-15 (sha256:abc123...) - 4.16.2 = stable (sha256:ghi789...) - -TAG PATTERNS: - • Semantic versioning (4.x.y) - • Release candidates (-rc.x) - • Date-based snapshots (YYYY-MM-DD) - • Architecture-specific suffixes (-amd64, -arm64) - -RECOMMENDATIONS: - • For production: Use stable (4.16.2) or specific version tag - • For testing: Use latest (4.17.0) - • For LTS: Use lts (4.15.18) - • Avoid: Using generic tags like 'latest' in production - • Pin by digest: Use @sha256:abc123... for reproducibility - -NOTABLE: - • 3 tags updated in the last 7 days - • 15 release candidates available - • Average tag age: 45 days - • Update frequency: ~2 tags per week -================================================================================ -``` - -**For Small Repositories:** -``` -================================================================================ -CONTAINER IMAGE TAGS -================================================================================ -Repository: docker.io/library/alpine - -Total Tags: 47 - -ALL TAGS: --------------------------------------------------------------------------------- -TAG CREATED SIZE ARCH DIGEST -latest 2024-01-20 12:00 7.3 MB multi sha256:abc123... -3.19 2024-01-20 12:00 7.3 MB multi sha256:abc123... -3.18 2023-11-15 09:30 7.0 MB multi sha256:def456... -3.17 2023-08-10 14:15 6.8 MB multi sha256:ghi789... -edge 2024-01-22 08:00 7.5 MB multi sha256:jkl012... -... - -RECOMMENDATIONS: - • For production: Use 3.19 (latest stable) - • For edge features: Use edge - • For compatibility: Use 3.18 or 3.17 -================================================================================ -``` - -## Examples - -1. **List tags for OpenShift release images**: - ``` - /container-image:tags quay.io/openshift-release-dev/ocp-release - ``` - Shows all available OpenShift release versions. - -2. **Check available UBI tags**: - ``` - /container-image:tags registry.redhat.io/ubi9/ubi - ``` - Lists all UBI 9 image tags. - -3. **Explore nginx versions**: - ``` - /container-image:tags docker.io/library/nginx - ``` - Shows available nginx versions and variants. - -4. **Check private repository tags**: - ``` - /container-image:tags registry.example.com/myorg/myapp - ``` - Lists tags from a private registry (requires authentication). - -5. **Analyze Prometheus tags**: - ``` - /container-image:tags quay.io/prometheus/prometheus - ``` - Shows Prometheus versions and release patterns. - -## Advanced Options - -The command can support optional filters and sorting: - -**Filter by Pattern:** -``` -/container-image:tags quay.io/openshift-release-dev/ocp-release --filter "4.17.*" -``` -Shows only 4.17.x tags. - -**Limit Results:** -``` -/container-image:tags docker.io/library/alpine --limit 10 -``` -Shows only the 10 most recent tags. - -**Sort Options:** -``` -/container-image:tags quay.io/myapp --sort version # Semantic version sort -/container-image:tags quay.io/myapp --sort date # Creation date sort -/container-image:tags quay.io/myapp --sort size # Size sort -``` - -**Architecture Filter:** -``` -/container-image:tags registry.example.com/myapp --arch arm64 -``` -Shows only tags that support arm64. - -## Error Handling - -- **Repository not found**: Verify repository name and registry -- **Authentication required**: Guide user to login with `skopeo login` -- **Network errors**: Check connectivity and registry availability -- **Tool not available**: Provide installation instructions for `skopeo` -- **Rate limiting**: Handle registry rate limits gracefully -- **Large repositories**: For repositories with 1000+ tags, sample or paginate results - -## Notes - -- **Tag Mutability**: Tags (except digests) can be reassigned to different images -- **Latest Tag**: "latest" doesn't always mean newest; it's just a convention -- **Digest Pinning**: For reproducible deployments, always use digest (@sha256:...) -- **Semantic Versioning**: Many projects follow semver (MAJOR.MINOR.PATCH) -- **Multi-arch Support**: Check which tags support your target architecture -- **Deprecation**: Older tags may be removed; check registry retention policies - -## Performance Considerations - -For repositories with many tags: -- The command samples tags rather than inspecting all -- Full inspection can be requested with `--full` flag -- Results can be cached for repeated queries -- Pagination is used for very large tag lists - -## Use Cases - -1. **Version Discovery**: Find the latest stable version before deployment -2. **Update Planning**: Identify available updates for current images -3. **Architecture Planning**: Verify multi-arch support before migration -4. **Cleanup Planning**: Identify old/unused tags for cleanup -5. **Compliance**: Document available versions for audit trails -6. **CI/CD Integration**: Automate image version selection -7. **Troubleshooting**: Compare production tag with available versions - -## Arguments - -- **$1** (repository): Required. The repository path (without tag). - - Format: `[registry/]repository` - - Examples: - - `quay.io/openshift-release-dev/ocp-release` - - `docker.io/library/nginx` - - `registry.redhat.io/ubi9/ubi` - - `registry.example.com/myorg/myapp` - -**Note**: Do NOT include the tag (`:tagname`) in the repository argument. diff --git a/plugins/doc/.claude-plugin/plugin.json b/plugins/doc/.claude-plugin/plugin.json deleted file mode 100644 index 3eee66964..000000000 --- a/plugins/doc/.claude-plugin/plugin.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "name": "doc", - "description": "A plugin for engineering documentation and notes", - "version": "0.0.2", - "author": { - "name": "OpenShift Engineering" - } -} diff --git a/plugins/doc/README.md b/plugins/doc/README.md deleted file mode 100644 index 3954ba0d7..000000000 --- a/plugins/doc/README.md +++ /dev/null @@ -1,18 +0,0 @@ -# Doc Plugin - -Engineering documentation and note-taking utilities for Claude Code. - -## Commands - -### `/doc:note` - -Create and manage engineering notes and documentation. - -See [commands/note.md](commands/note.md) for full documentation. - -## Installation - -```bash -/plugin install doc@ai-helpers -``` - diff --git a/plugins/doc/commands/note.md b/plugins/doc/commands/note.md deleted file mode 100644 index 7dc3547a5..000000000 --- a/plugins/doc/commands/note.md +++ /dev/null @@ -1,71 +0,0 @@ ---- -description: Generate professional engineering notes and append them to a log file -argument-hint: "[task description]" ---- - -## Name -doc:note - -## Synopsis -``` -/doc:note [task description] -``` - -## Description -The `doc:note` command generates professional engineering notes about completed tasks and appends them to a persistent log file (`~/engineering-notes.md`). It automatically searches for relevant context including GitHub PR URLs, issue numbers, and Jira ticket references in the conversation history and git repository. - -This command helps engineers maintain a structured record of their daily work, making it easier to: -- Track accomplishments for performance reviews -- Generate status reports and weekly updates -- Maintain a searchable history of technical decisions -- Document completed work with proper attribution - -## Implementation -The command performs the following steps: -1. **Context gathering**: Searches conversation history for GitHub PR URLs, issue numbers, or Jira ticket keys (e.g., PROJ-123) -2. **Git analysis**: If in a git repository, checks recent commits and current branch name for references -3. **Note generation**: Creates a 1-2 sentence note with: - - Today's date in YYYY-MM-DD format - - Accomplishment framed in past tense - - Technical details and specific technologies used - - Impact and value delivered - - All relevant links inline -4. **File management**: Appends the note to `~/engineering-notes.md` (creates file if it doesn't exist) with proper spacing - -If the task description argument is omitted, the command will attempt to discover a task description from relevant context (e.g. git repository status and conversation history). If no relevant context is discovered, or if more information is needed, the command will prompt for further context. - -## Return Value -- **Success**: Confirmation message with the generated note -- **File created**: `~/engineering-notes.md` (if it didn't exist) -- **File updated**: Note appended with blank line separator - -## Examples - -1. **Basic usage with task description**: - ``` - /doc:note Implemented user authentication with OAuth2 - ``` - Generates: - ``` - 2025-10-24 - Implemented user authentication using OAuth2. Integrated with Google and GitHub providers, added JWT token management, and secured API endpoints with role-based access control. - - ``` - -2. **Without task description (auto-discovers from context)**: - ``` - /doc:note - ``` - The command analyzes git repository and conversation history to generate a note. If insufficient context is available, it will prompt for details. - -3. **With git context**: - ``` - /doc:note Fixed critical bug in payment processor - ``` - If on a branch named `fix/payment-timeout` with recent commits, generates: - ``` - 2025-10-24 - Fixed critical timeout bug in payment processor (PR #123). Optimized database queries and added connection pooling, reducing payment processing time by 60% and eliminating timeout errors. - - ``` - -## Arguments -- `[task description]`: Optional description of the completed task. If omitted, the command attempts to discover context automatically. diff --git a/plugins/gwapi/.claude-plugin/plugin.json b/plugins/gwapi/.claude-plugin/plugin.json deleted file mode 100644 index 39e7c34fd..000000000 --- a/plugins/gwapi/.claude-plugin/plugin.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "name": "gwapi", - "description": "Gateway API management for Kubernetes/OpenShift clusters", - "version": "0.0.1", - "author": { - "name": "github.com/openshift-eng" - } -} diff --git a/plugins/gwapi/README.md b/plugins/gwapi/README.md deleted file mode 100644 index 218696e5e..000000000 --- a/plugins/gwapi/README.md +++ /dev/null @@ -1,87 +0,0 @@ -# Gateway API Plugin - -Install and configure Gateway API resources on Kubernetes and OpenShift clusters. - -## Overview - -This Gateway API plugin provides utilities for installing Gateway API resources with automatic cluster configuration. It simplifies the deployment of GatewayClass and Gateway resources by applying the appropriate configuration. - -## Commands - -### `/gwapi:install` - -Install Gateway API resources to a Kubernetes/OpenShift cluster. - -See [commands/install.md](commands/install.md) for complete documentation. - -### `/gwapi:check` - -Check the installed Gateway API resources in the connected cluster. - -See [commands/check.md](commands/check.md) for complete documentation. - -### `/gwapi:delete` - -Delete Gateway API resources in the Kubernetes/OpenShift cluster. - -See [commands/delete.md](commands/delete.md) for complete documentation. - -**Synopsis:** -```bash -/gwapi:install [namespace] -/gwapi:check [namespace] -/gwapi:delete [namespace] -``` - -**Features:** -- Automatically detects cluster ingress domain -- Installs GatewayClass and Gateway resources -- Supports both OpenShift (`oc`) and Kubernetes (`kubectl`) -- Optional namespace targeting -- Check installed Gateway API resources -- Delete Gateway API resources -- Idempotent installation (safe to run multiple times) - -## Installation - -```bash -/plugin install gwapi@ai-helpers -``` - -## Prerequisites - -- Either `oc` (OpenShift CLI) or `kubectl` (Kubernetes CLI) must be installed -- Active connection to a Kubernetes or OpenShift cluster -- Appropriate permissions to create cluster-scoped resources (GatewayClass) and namespaced resources (Gateway) - -## Resources Installed - -The plugin installs, checks and deletes the following Gateway API resources: - -1. **GatewayClass** (`openshift-default`) - - Controller: `openshift.io/gateway-controller/v1` - - Cluster-scoped resource defining the gateway implementation - -2. **Gateway** (`gateway`) - - Namespace: `openshift-ingress` (default) - - Hostname pattern: `*.gwapi.${DOMAIN}` (automatically configured) - - Listener on port 80 (HTTP) - - Allows routes from all namespaces - -## How It Works - -1. Detects available CLI tool (`oc` or `kubectl`) -2. Verifies cluster connectivity -3. Retrieves cluster ingress domain (OpenShift) or prompts for manual input (Kubernetes) -4. Applies GatewayClass resource -5. Substitutes cluster domain into Gateway resource and applies it -6. Verifies installation success -7. Checks the installed and other related Gateway API resources -8. Deletes all related resources after prompting the user - -## Notes - -- The Gateway resource uses `${DOMAIN}` as a placeholder that gets replaced with your cluster's actual ingress domain -- Resources are applied idempotently - you can run the command multiple times safely -- Original YAML files are not modified; domain substitution happens in-memory during application -- Deleting the Gateway API resources provides warnings and disclaimers diff --git a/plugins/gwapi/commands/check.md b/plugins/gwapi/commands/check.md deleted file mode 100644 index 6ac21055f..000000000 --- a/plugins/gwapi/commands/check.md +++ /dev/null @@ -1,147 +0,0 @@ ---- -description: Check Gateway API resources status in the cluster -argument-hint: "[namespace]" ---- - -## Name -gwapi:check - -## Synopsis -```bash -/gwapi:check [namespace] -``` - -## Description -The `gwapi:check` command verifies the status of Gateway API resources in a Kubernetes or OpenShift cluster. It checks: -1. Presence and status of GatewayClass resources -2. Presence and status of Gateway resources -3. Gateway listener configuration and readiness -4. Gateway addresses and connectivity - -This command helps troubleshoot Gateway API deployments and verify successful installation. - -## Arguments -- `$1` (optional): Target namespace to check for Gateway resources. If not specified, checks all namespaces for GatewayClass (cluster-scoped) and Gateway resources. - -## Implementation - -1. **Tool Detection** - - Check if `oc` is available: `which oc` - - If not available, check for `kubectl`: `which kubectl` - - If neither is available, inform the user to install one of these tools: - - OpenShift CLI: - - Kubernetes CLI: - -2. **Cluster Connection Verification** - - Verify cluster connectivity: `oc whoami` or `kubectl cluster-info` - - If connection fails, inform the user to authenticate to their cluster: - - For OpenShift: `oc login ` - - For Kubernetes: Configure kubeconfig properly - -3. **Check GatewayClass Resources** - - List all GatewayClass resources: `oc get gatewayclass` or `kubectl get gatewayclass` - - For each GatewayClass found: - - Display name, controller, and ACCEPTED status - - Get detailed status: `oc get gatewayclass -o yaml` - - Check the `status.conditions` for any errors or warnings - - If no GatewayClass found: - - Display: "No GatewayClass resources found. You may need to install Gateway API CRDs or run /gwapi:install" - -4. **Check Gateway Resources** - - If namespace argument provided: - - Check Gateway resources in specified namespace: `oc get gateway -n ` - - If no namespace argument: - - Check all namespaces: `oc get gateway --all-namespaces` - - For each Gateway found: - - Display name, namespace, class, and PROGRAMMED status - - Get detailed information: `oc get gateway -n -o yaml` - - Extract and display: - - Gateway addresses (LoadBalancer IPs/hostnames) - - Listener configurations (hostnames, ports, protocols) - - Listener status and attached routes count - - Check the `status.conditions` for any errors or warnings - - If no Gateway found: - - Display: "No Gateway resources found in [namespace/cluster]" - -5. **Status Summary** - - Create a summary report with: - - Total GatewayClass count and their statuses - - Total Gateway count per namespace - - Number of ready vs not-ready Gateways - - Any errors or warnings found - -6. **Connectivity Check (Optional)** - - For each Gateway with an address: - - Display the address (LoadBalancer hostname/IP) - - Suggest testing connectivity: `curl -v http://` - - Note: Actual connectivity testing is optional and should be suggested rather than automatically performed - -7. **Error Handling** - - If API resources not found: - - Display: "Gateway API CRDs not installed. Install them using /gwapi:install or manually install Gateway API CRDs" - - If access denied: - - Display: "Insufficient permissions. GatewayClass requires cluster-scoped read access, Gateway requires namespace read access" - - If cluster unreachable: - - Display connection error and suggest checking cluster status - -## Return Value -- **Success**: Status report showing all Gateway API resources and their health -- **No Resources**: Information that no Gateway API resources were found with suggestion to run /gwapi:install -- **Error**: Error message with troubleshooting steps - -## Examples - -1. **Check all Gateway API resources**: - ```bash - /gwapi:check - ``` - Displays status of all GatewayClass and Gateway resources across the cluster. - -2. **Check Gateway resources in specific namespace**: - ```bash - /gwapi:check openshift-ingress - ``` - Shows Gateway resources only in the `openshift-ingress` namespace, plus all cluster-scoped GatewayClass resources. - -## Output Format - -The command should produce output similar to: - -```text -Gateway API Status Check -======================== - -GatewayClass Resources: ------------------------ -NAME CONTROLLER ACCEPTED AGE -openshift-default openshift.io/gateway-controller/v1 True 2h - -Gateway Resources: ------------------- -NAMESPACE NAME CLASS PROGRAMMED AGE -openshift-ingress gateway openshift-default True 1h - -Gateway Details: gateway (openshift-ingress) ---------------------------------------------- -Address: a0a658ac4b2d447fa83d2f247a0dc714-1135029665.us-west-1.elb.amazonaws.com -Listeners: - - Name: demo - Hostname: *.gwapi.apps.ci-ln-42q9hck-76ef8.aws-4.ci.openshift.org - Port: 80 - Protocol: HTTP - Status: Ready - Attached Routes: 3 - -Summary: --------- -✓ 1 GatewayClass (1 accepted) -✓ 1 Gateway (1 programmed) -✓ All resources healthy -``` - -## Notes -- GatewayClass is cluster-scoped, so it's always checked regardless of namespace argument -- Gateway is namespace-scoped, filtered by namespace argument if provided -- The command is read-only and makes no modifications to the cluster -- Useful for verifying successful installation after running /gwapi:install -- Can be run repeatedly to monitor Gateway API resource health diff --git a/plugins/gwapi/commands/delete.md b/plugins/gwapi/commands/delete.md deleted file mode 100644 index a4f6a7f17..000000000 --- a/plugins/gwapi/commands/delete.md +++ /dev/null @@ -1,148 +0,0 @@ ---- -description: Delete Gateway API resources from a Kubernetes/OpenShift cluster -argument-hint: "[namespace]" ---- - -## Name -gwapi:delete - -## Synopsis -```bash -/gwapi:delete [namespace] -``` - -## Description -The `gwapi:delete` command removes Gateway API resources from a Kubernetes or OpenShift cluster. It deletes: -1. Gateway resources (namespace-scoped) -2. GatewayClass resources (cluster-scoped) - -The command uses `oc` (preferred) or `kubectl` to delete the resources safely. It provides confirmation before deletion and verifies successful removal. - -## Arguments -- `$1` (optional): Target namespace for deleting Gateway resources. If not specified, deletes Gateway resources from the `openshift-ingress` namespace (as defined in the YAML files) and the cluster-scoped GatewayClass. - -## Implementation - -1. **Tool Detection** - - Check if `oc` is available: `which oc` - - If not available, check for `kubectl`: `which kubectl` - - If neither is available, inform the user to install one of these tools: - - OpenShift CLI: - - Kubernetes CLI: - -2. **Cluster Connection Verification** - - Verify cluster connectivity: `oc whoami` or `kubectl cluster-info` - - If connection fails, inform the user to authenticate to their cluster: - - For OpenShift: `oc login ` - - For Kubernetes: Configure kubeconfig properly - -3. **Resource Discovery** - - Check for existing Gateway resources: - - If namespace argument provided: `oc get gateway -n ` - - If no namespace argument: `oc get gateway --all-namespaces` - - Check for existing GatewayClass resources: `oc get gatewayclass` - - If no resources found: - - Display: "No Gateway API resources found to delete" - - Exit successfully - -4. **Display Resources to be Deleted** - - Show a clear list of resources that will be deleted: - ```text - The following resources will be deleted: - - GatewayClass: - - openshift-default - - Gateway (openshift-ingress): - - gateway - ``` - -5. **User Confirmation** - - Ask for confirmation before proceeding with deletion - - Use AskUserQuestion tool to confirm: - - Question: "Are you sure you want to delete these Gateway API resources?" - - Options: - - "Yes, delete all resources" - - "No, cancel deletion" - - If user selects "No" or cancels, exit without making changes - -6. **Delete Gateway Resources** - - If namespace argument provided: - - Delete Gateway resources from specified namespace - - For each Gateway found: `oc delete gateway -n ` - - If no namespace argument: - - Delete the specific Gateway from the YAML: `oc delete -f plugins/gwapi/resources/gateway.yaml --ignore-not-found` - - Alternative: Delete by name if known: `oc delete gateway gateway -n openshift-ingress --ignore-not-found` - - Display deletion status for each Gateway - - Use `--ignore-not-found` flag to handle already-deleted resources gracefully - -7. **Delete GatewayClass Resources** - - Delete the GatewayClass resource: `oc delete -f plugins/gwapi/resources/gatewayclass.yaml --ignore-not-found` - - Alternative: Delete by name: `oc delete gatewayclass openshift-default --ignore-not-found` - - Display deletion status - - Note: GatewayClass is cluster-scoped, so namespace argument doesn't apply - -8. **Deletion Verification** - - Verify Gateway resources are deleted: - - If namespace was specified: `oc get gateway -n ` - - Otherwise: `oc get gateway --all-namespaces` - - Verify GatewayClass is deleted: `oc get gatewayclass` - - If resources still exist, display warning with resource names - - If all resources are deleted, display success confirmation - -9. **Error Handling** - - If deletion fails due to permissions: - - Display: "Insufficient permissions. Deleting GatewayClass requires cluster-admin privileges, Gateway requires namespace delete permissions" - - If resources are in use (have attached routes): - - Display warning about attached routes - - Show number of attached routes per Gateway - - Confirm user still wants to proceed - - If deletion partially fails: - - Display which resources were successfully deleted - - Display which resources failed with error messages - - Provide troubleshooting steps for failed deletions - -10. **Cleanup Summary** - - Display a summary of deletion results: - - Number of Gateways deleted - - Number of GatewayClasses deleted - - Any errors or warnings encountered - -## Return Value -- **Success**: Confirmation message listing all deleted resources -- **No Resources**: Information that no Gateway API resources were found -- **Partial Success**: List of successfully deleted and failed resources -- **Cancelled**: Message that deletion was cancelled by user -- **Failure**: Error message with troubleshooting steps - -## Examples - -1. **Delete all Gateway API resources**: - ```bash - /gwapi:delete - ``` - Prompts for confirmation, then deletes Gateway from `openshift-ingress` namespace and the GatewayClass. - -2. **Delete Gateway from specific namespace**: - ```bash - /gwapi:delete gateway-system - ``` - Deletes Gateway resources only from the `gateway-system` namespace and the cluster-scoped GatewayClass (after confirmation). - -## Notes -- **Destructive Operation**: This command permanently deletes resources. Always confirm before proceeding. -- **Attached Routes**: If HTTPRoute or other route resources reference the Gateway, they may become non-functional after deletion -- **Cluster-Scoped**: GatewayClass deletion requires cluster-admin or equivalent permissions -- **Idempotent**: Safe to run multiple times - uses `--ignore-not-found` flag -- **No Cascade**: Deleting GatewayClass does not automatically delete associated Gateways -- **Service Impact**: Deleting Gateway resources will stop routing traffic through the Gateway -- **Confirmation Required**: User must explicitly confirm deletion to prevent accidental resource removal -- **Resource Files**: The original YAML files in `plugins/gwapi/resources/` are not modified or deleted - -## Safety Features -- Requires explicit user confirmation before deletion -- Displays all resources to be deleted before proceeding -- Uses `--ignore-not-found` to handle already-deleted resources -- Provides clear error messages for troubleshooting -- Verifies deletion was successful -- Warns about attached routes that may be impacted diff --git a/plugins/gwapi/commands/install.md b/plugins/gwapi/commands/install.md deleted file mode 100644 index d8494031c..000000000 --- a/plugins/gwapi/commands/install.md +++ /dev/null @@ -1,208 +0,0 @@ ---- -description: Install Gateway API resources to a Kubernetes/OpenShift cluster -argument-hint: "[namespace]" ---- - -## Name -gwapi:install - -## Synopsis -```bash -/gwapi:install [namespace] -``` - -## Description -The `gwapi:install` command applies Gateway API YAML resources to a Kubernetes or OpenShift cluster. It installs: -1. `gatewayclass.yaml` - Defines the GatewayClass resource -2. `gateway.yaml` - Defines the Gateway resource with cluster-specific domain configuration - -The command automatically retrieves the cluster's ingress domain and substitutes it into the gateway.yaml before applying. It uses `oc` (preferred) or `kubectl` to install the resources. - -**The command waits for all resources to reach a successful status before completing** (up to 5 minutes timeout). This ensures that the Gateway API resources are fully reconciled and ready for use. - -## Arguments -- `$1` (optional): Target namespace for installing Gateway API resources. If not specified, uses the namespace defined in the YAML files or the current namespace context. - -## Implementation - -1. **Tool Detection** - - Check if `oc` is available: `which oc` - - If not available, check for `kubectl`: `which kubectl` - - If neither is available, inform the user to install one of these tools: - - OpenShift CLI: - - Kubernetes CLI: - -2. **Cluster Connection Verification** - - Verify cluster connectivity: `oc whoami` or `kubectl cluster-info` - - If connection fails, inform the user to authenticate to their cluster: - - For OpenShift: `oc login ` - - For Kubernetes: Configure kubeconfig properly - -3. **Retrieve Cluster Domain** - - Get the cluster's ingress domain: `DOMAIN=$(oc get ingresses.config/cluster -o jsonpath={.spec.domain})` - - If this fails (e.g., on non-OpenShift clusters), ask the user to provide the domain manually - - Verify domain is not empty: `echo $DOMAIN` - -4. **Namespace Handling** - - If namespace argument is provided: - - Check if namespace exists: `oc get namespace ` or `kubectl get namespace ` - - If it doesn't exist, create it: `oc create namespace ` or `kubectl create namespace ` - - Set context to use this namespace for subsequent commands - -5. **Install GatewayClass** - - Locate `plugins/gwapi/resources/gatewayclass.yaml` - - Display: "Installing GatewayClass..." - - Apply the resource: `oc apply -f plugins/gwapi/resources/gatewayclass.yaml` or `kubectl apply -f plugins/gwapi/resources/gatewayclass.yaml` - - Note: GatewayClass is cluster-scoped, so it does not require a namespace flag - - Capture and display any errors or warnings - -6. **Install Gateway with Domain Substitution** - - Locate `plugins/gwapi/resources/gateway.yaml` - - Display: "Installing Gateway with domain: $DOMAIN" - - Export the domain as an environment variable: `export DOMAIN=""` - - Substitute the domain in the YAML file using envsubst: `envsubst < plugins/gwapi/resources/gateway.yaml | oc apply -f -` - - If namespace argument was provided, add `-n ` flag - - Capture and display any errors or warnings - -7. **Wait for Resources to be Ready** - - Set timeout to 300 seconds (5 minutes) - - Poll every 5 seconds until resources are ready or timeout is reached - - **GatewayClass readiness check:** - - Get GatewayClass name from applied resource (e.g., `openshift-default`) - - Check ACCEPTED condition: `oc get gatewayclass -o jsonpath='{.status.conditions[?(@.type=="Accepted")].status}'` - - GatewayClass is ready when: ACCEPTED condition status is `True` - - Display progress: "Waiting for GatewayClass to be accepted... (attempt X/60)" - - **Gateway readiness check:** - - Determine namespace where Gateway was created (from YAML or argument) - - Get Gateway name from applied resource (e.g., `gateway`) - - Check PROGRAMMED condition: `oc get gateway -n -o jsonpath='{.status.conditions[?(@.type=="Programmed")].status}'` - - Check ACCEPTED condition: `oc get gateway -n -o jsonpath='{.status.conditions[?(@.type=="Accepted")].status}'` - - Gateway is ready when: PROGRAMMED condition is `True` AND ACCEPTED condition is `True` - - Display progress: "Waiting for Gateway to be programmed... (attempt X/60)" - - **Polling implementation:** - ```bash - TIMEOUT=300 - INTERVAL=5 - ELAPSED=0 - - # Wait for GatewayClass - while [ $ELAPSED -lt $TIMEOUT ]; do - ACCEPTED=$(oc get gatewayclass -o jsonpath='{.status.conditions[?(@.type=="Accepted")].status}' 2>/dev/null) - if [ "$ACCEPTED" = "True" ]; then - echo "✓ GatewayClass is accepted" - break - fi - echo "Waiting for GatewayClass to be accepted... ($(($ELAPSED))s / ${TIMEOUT}s)" - sleep $INTERVAL - ELAPSED=$(($ELAPSED + $INTERVAL)) - done - - # Wait for Gateway - ELAPSED=0 - while [ $ELAPSED -lt $TIMEOUT ]; do - PROGRAMMED=$(oc get gateway -n -o jsonpath='{.status.conditions[?(@.type=="Programmed")].status}' 2>/dev/null) - ACCEPTED=$(oc get gateway -n -o jsonpath='{.status.conditions[?(@.type=="Accepted")].status}' 2>/dev/null) - if [ "$PROGRAMMED" = "True" ] && [ "$ACCEPTED" = "True" ]; then - echo "✓ Gateway is ready" - break - fi - echo "Waiting for Gateway to be ready... ($(($ELAPSED))s / ${TIMEOUT}s)" - sleep $INTERVAL - ELAPSED=$(($ELAPSED + $INTERVAL)) - done - ``` - - **Timeout handling:** - - If timeout is reached before resources are ready: - - Display current status of resources with detailed condition information - - Show any error messages from status conditions - - Command should exit with an error status - - Display: "Timeout waiting for resources to be ready. Current status:" - - Display full resource status: `oc get gatewayclass -o yaml` and `oc get gateway -n -o yaml` - -8. **Final Verification and Summary** - - Once all resources are ready (or timeout occurred), display final summary: - - Check GatewayClass: `oc get gatewayclass` or `kubectl get gatewayclass` - - Check Gateway: `oc get gateway -A` or `kubectl get gateway -A` - - Display complete installation status with resource names, namespaces, and conditions - - If all resources are ready, display success message - - If timeout occurred, display error message with troubleshooting steps - -9. **Error Handling** - - If domain retrieval fails: - - Display the error and ask user to verify they're connected to an OpenShift cluster - - Suggest manual domain input - - If any YAML application fails: - - Display the error message - - Continue with remaining resources (don't fail fast) - - Provide summary of successful and failed resources at the end - - If resources don't become ready within timeout: - - Display current state of resources with full YAML output - - Show condition details and error messages - - Exit with error status - - Suggest troubleshooting steps (check controller logs, verify prerequisites) - -## Return Value -- **Success**: All resources are installed and ready - - GatewayClass ACCEPTED condition is `True` - - Gateway PROGRAMMED and ACCEPTED conditions are `True` - - Confirmation message with resource names, namespaces, and ready status -- **Timeout**: Resources were created but didn't become ready within 5 minutes - - Display current status of all resources - - Show condition details and any error messages - - Exit with error status -- **Failure**: Resources failed to apply - - Error message with details about what failed - - Troubleshooting steps - -## Examples - -1. **Install to default namespace**: - ```bash - /gwapi:install - ``` - Installs `gatewayclass.yaml` and `gateway.yaml` with the cluster's ingress domain automatically configured, then waits for resources to be ready. - - Example output: - ``` - Installing GatewayClass... - gatewayclass.gateway.networking.k8s.io/openshift-default created - Installing Gateway with domain: apps.example.com - gateway.gateway.networking.k8s.io/gateway created - Waiting for GatewayClass to be accepted... (0s / 300s) - Waiting for GatewayClass to be accepted... (5s / 300s) - ✓ GatewayClass is accepted - Waiting for Gateway to be ready... (0s / 300s) - Waiting for Gateway to be ready... (5s / 300s) - ✓ Gateway is ready - - Installation complete! All resources are ready. - ``` - -2. **Install to specific namespace**: - ```bash - /gwapi:install gateway-system - ``` - Installs both resources to the `gateway-system` namespace with domain substitution, then waits for resources to be ready. - -## Notes -- YAML files should be placed in `plugins/gwapi/resources/` directory: - - `gatewayclass.yaml` - GatewayClass definition - - `gateway.yaml` - Gateway definition with `${DOMAIN}` placeholder -- The `gateway.yaml` file should use `${DOMAIN}` as a placeholder for the cluster's ingress domain -- Domain is automatically retrieved from OpenShift cluster: `oc get ingresses.config/cluster -o jsonpath={.spec.domain}` -- Domain substitution is performed using `envsubst` which replaces `${DOMAIN}` with the actual cluster domain -- Resources are applied with `oc apply` which is idempotent - safe to run multiple times -- The command does not modify existing resources unless YAML content has changed -- The original YAML files are not modified; domain substitution happens in-memory during application -- **Waiting behavior:** - - Default timeout: 300 seconds (5 minutes) - - Poll interval: 5 seconds - - GatewayClass is considered ready when ACCEPTED condition is `True` - - Gateway is considered ready when both PROGRAMMED and ACCEPTED conditions are `True` - - If timeout is reached, the command exits with an error and displays the current resource status -- The command blocks until all resources are ready or timeout occurs -- Progress updates are displayed every 5 seconds during the wait diff --git a/plugins/gwapi/resources/gateway.yaml b/plugins/gwapi/resources/gateway.yaml deleted file mode 100644 index 11d795822..000000000 --- a/plugins/gwapi/resources/gateway.yaml +++ /dev/null @@ -1,15 +0,0 @@ -apiVersion: gateway.networking.k8s.io/v1 -kind: Gateway -metadata: - name: gateway - namespace: openshift-ingress -spec: - gatewayClassName: openshift-default - listeners: - - name: demo - hostname: "*.gwapi.${DOMAIN}" - port: 80 - protocol: HTTP - allowedRoutes: - namespaces: - from: All diff --git a/plugins/gwapi/resources/gatewayclass.yaml b/plugins/gwapi/resources/gatewayclass.yaml deleted file mode 100644 index c18472477..000000000 --- a/plugins/gwapi/resources/gatewayclass.yaml +++ /dev/null @@ -1,6 +0,0 @@ -apiVersion: gateway.networking.k8s.io/v1 -kind: GatewayClass -metadata: - name: openshift-default -spec: - controllerName: openshift.io/gateway-controller/v1 diff --git a/plugins/lvms/.claude-plugin/plugin.json b/plugins/lvms/.claude-plugin/plugin.json deleted file mode 100644 index fd3714e89..000000000 --- a/plugins/lvms/.claude-plugin/plugin.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "name": "lvms", - "description": "LVMS (Logical Volume Manager Storage) plugin for troubleshooting and debugging storage issues", - "version": "0.1.1", - "author": { - "name": "github.com/openshift-eng" - } -} diff --git a/plugins/lvms/README.md b/plugins/lvms/README.md deleted file mode 100644 index 1c3303e4f..000000000 --- a/plugins/lvms/README.md +++ /dev/null @@ -1,251 +0,0 @@ -# LVMS Plugin - -Comprehensive troubleshooting and debugging plugin for LVMS (Logical Volume Manager Storage). - -## Overview - -The LVMS plugin provides powerful commands for diagnosing and troubleshooting storage issues in OpenShift clusters using LVMS. It analyzes LVMCluster resources, volume groups, PVCs, TopoLVM CSI driver, and node-level storage configuration to identify root causes of storage failures. - -## Commands - -### `/lvms:analyze` - -Comprehensive LVMS troubleshooting that analyzes cluster health, storage resources, and identifies common issues. - -**Works with:** -- Live OpenShift clusters (via `oc` CLI) -- LVMS must-gather data (offline analysis) - -**Features:** -- LVMCluster health and readiness analysis -- Volume group status across all nodes -- PVC/PV binding issues and pending volumes -- LVMS operator and TopoLVM CSI driver health -- Node-level device availability and configuration (live clusters) -- Thin pool capacity and usage -- Pod log analysis with error deduplication -- Root cause analysis with specific remediation steps - -**Usage Examples:** - -```bash -# Analyze live cluster -/lvms:analyze --live - -# Analyze must-gather data -/lvms:analyze ./must-gather/registry-ci-openshift-org-origin-4-18.../ - -# Focus on specific component -/lvms:analyze --live --component storage -/lvms:analyze ./must-gather/... check pending PVCs - -# Analyze pod logs only -/lvms:analyze --live --component logs -/lvms:analyze ./must-gather/... --component logs -``` - -## Common Use Cases - -### 1. PVCs Stuck in Pending State - -When PVCs using LVMS storage classes are not binding: - -```bash -/lvms:analyze --live check pending PVCs -``` - -The command will: -- Identify which PVCs are pending -- Check volume group free space -- Verify TopoLVM CSI driver is running -- Check for node affinity issues -- Provide specific remediation steps - -### 2. LVMCluster Not Ready - -When LVMCluster resource is not reaching Ready state: - -```bash -/lvms:analyze --live analyze operator -``` - -The command will: -- Check LVMCluster status and conditions -- Identify which nodes have volume group issues -- Verify device availability and configuration -- Check for conflicting filesystems on devices -- Provide steps to clean devices and recreate VGs - -### 3. Volume Group Creation Failures - -When volume groups are not being created on nodes: - -```bash -/lvms:analyze --live --component volumes -``` - -The command will: -- Show volume group status per node -- Identify missing or failed volume groups -- Check device selector configuration -- Detect devices already in use -- Provide commands to wipe devices and retry - -### 4. Must-Gather Analysis - -When analyzing a must-gather from a failed cluster: - -```bash -/lvms:analyze ./must-gather/path/ -``` - -The command will: -- Perform offline analysis of all LVMS resources -- Generate comprehensive health report -- Identify critical issues and warnings -- Provide prioritized remediation recommendations -- Suggest which logs to review - -## Installation - -### From Marketplace - -```bash -# Add the marketplace -/plugin marketplace add openshift-eng/ai-helpers - -# Install LVMS plugin -/plugin install lvms@ai-helpers - -# Use the command -/lvms:analyze --live -``` - -### Manual Installation - -```bash -# Clone the repository -git clone https://github.com/openshift-eng/ai-helpers.git - -# Link to Claude Code plugins directory -ln -s $(pwd)/ai-helpers/plugins/lvms ~/.claude/plugins/lvms -``` - -## Prerequisites - -**For Live Cluster Analysis:** -- `oc` CLI installed and configured -- Active cluster connection -- Read access to `openshift-lvm-storage` or older `openshift-storage` namespace -- Ability to read cluster-scoped resources - -**For Must-Gather Analysis:** -- Python 3.6+ (for analysis script) -- PyYAML library: `pip install pyyaml` - -## What the Plugin Checks - -### LVMCluster Resources -- Overall state (Ready, Progressing, Failed, Degraded) -- Status conditions (ResourcesAvailable, VolumeGroupsReady) -- Device class configurations -- Node coverage and readiness - -### Volume Groups -- Volume group creation status per node -- Physical volume availability -- Free space and capacity -- Thin pool configuration and usage -- Missing or failed volume groups - -### Storage (PVCs/PVs) -- PVC binding status -- Pending volume provisioning failures -- Storage class configuration -- Capacity issues -- Node affinity constraints - -### Operator Health -- LVMS operator deployment status -- TopoLVM controller readiness -- TopoLVM node daemonset coverage -- VG-manager daemonset status -- Pod crashes and restarts - -### Node Devices -- Block device availability -- Existing filesystems on devices -- Device selector matches -- Disk capacity and usage - -### Pod Logs -- Error and warning messages from vg-manager pods -- Error and warning messages from lvms-operator pod -- Deduplication of repeated errors from reconciliation loops -- JSON log parsing with timestamps and context - -## Output Format - -The plugin provides structured, color-coded output: - -- ✓ Green checkmarks for healthy components -- ⚠ Yellow warnings for non-critical issues -- ❌ Red errors for critical problems -- ℹ Blue info for additional context - -Reports include: -- Component-by-component health status -- Root cause analysis -- Prioritized recommendations -- Specific remediation commands -- Links to relevant documentation - -## Troubleshooting the Plugin - -**Script not found:** -```bash -# Verify script exists -ls plugins/lvms/skills/lvms-analyzer/scripts/analyze_lvms.py - -# Make executable -chmod +x plugins/lvms/skills/lvms-analyzer/scripts/analyze_lvms.py -``` - -**Cannot connect to cluster:** -```bash -# Verify oc is configured -oc whoami -oc cluster-info - -# Check LVMS namespace -oc get namespace openshift-lvm-storage -``` - -**Must-gather path errors:** -```bash -# Use the correct subdirectory (the one with the hash) -ls must-gather/registry-ci-*/namespaces/openshift-lvm-storage - -# Not the parent directory -``` - -## Related Resources - -- [LVMS GitHub Repository](https://github.com/openshift/lvm-operator) -- [LVMS Troubleshooting Guide](https://github.com/openshift/lvm-operator/blob/main/docs/troubleshooting.md) -- [TopoLVM Documentation](https://github.com/topolvm/topolvm) -- [OpenShift Storage Documentation](https://docs.openshift.com/container-platform/latest/storage/index.html) - -## Contributing - -Contributions are welcome! Please see the main repository's [CLAUDE.md](../../CLAUDE.md) for guidelines on: -- Adding new commands -- Extending analysis capabilities -- Improving diagnostic checks -- Adding helper scripts - -## Support - -For issues or feature requests: -- GitHub Issues: https://github.com/openshift-eng/ai-helpers/issues -- Repository: https://github.com/openshift-eng/ai-helpers diff --git a/plugins/lvms/commands/analyze.md b/plugins/lvms/commands/analyze.md deleted file mode 100644 index 64a1bf144..000000000 --- a/plugins/lvms/commands/analyze.md +++ /dev/null @@ -1,755 +0,0 @@ ---- -description: Comprehensive LVMS troubleshooting - analyzes LVMCluster, volume groups, PVCs, and storage issues on live clusters or must-gather -argument-hint: "[must-gather-path|--live] [--component storage|operator|volumes]" ---- - -## Name -lvms:analyze - -## Synopsis -``` -/lvms:analyze [must-gather-path] [--live] [--component ] -``` - -## Description - -The `lvms:analyze` command provides comprehensive troubleshooting for OpenShift LVMS (Logical Volume Manager Storage). It analyzes the health and configuration of LVMCluster, volume groups, PVCs, TopoLVM CSI driver, and node-level storage to identify and diagnose common LVMS issues. - -The command can operate in two modes: -- **Must-gather analysis**: Analyzes LVMS must-gather data offline -- **Live cluster analysis**: Connects to a running cluster and performs real-time diagnostics - -Common issues detected: -- PVCs stuck in Pending state -- LVMCluster not reaching Ready state -- Volume group creation failures -- Missing or unhealthy physical volumes -- TopoLVM CSI driver issues -- Node-level disk availability problems -- Thin pool configuration issues -- Device class misconfigurations -- Operator and vg-manager pod errors (from log analysis) - -## Prerequisites - -**For Live Cluster Analysis:** -- `oc` CLI installed and configured -- Active cluster connection: `oc whoami` -- Read access to LVMS namespace (`openshift-lvm-storage` or older `openshift-storage`) -- Ability to read cluster-scoped resources (CRDs, Nodes, PVs) - -**For Must-Gather Analysis:** -- LVMS must-gather data extracted to a directory -- Must-gather structure: - ``` - must-gather/ - └── registry-ci-openshift-org.../ - ├── cluster-scoped-resources/ - ├── namespaces/ - │ └── openshift-lvm-storage/ (or openshift-storage for older versions) - └── ... - ``` - -**Namespace Compatibility:** -- LVMS namespace changed from `openshift-storage` to `openshift-lvm-storage` in recent versions -- The command automatically detects which namespace is used in the must-gather -- Both namespaces are supported for backward compatibility - -**Analysis Script:** -- Python 3 script at: `plugins/lvms/skills/lvms-analyzer/scripts/analyze_lvms.py` -- If script is missing, command will use built-in analysis logic - -## Implementation - -The command performs the following steps: - -1. **Determine Analysis Mode**: - - If the `--live` flag is present, proceed with live cluster analysis - - If path argument is provided, proceed with must-gather analysis - - If neither provided, ask user which mode to use - - Validate prerequisites for selected mode - -2. **Validate Environment**: - - **For Live Cluster:** - ```bash - # Verify oc CLI - which oc - - # Verify cluster connection - oc whoami - - # Check LVMS namespace exists (try both namespaces) - oc get namespace openshift-lvm-storage 2>/dev/null || oc get namespace openshift-storage - ``` - - **For Must-Gather:** - ```bash - # Verify path exists (checks both old and new namespaces) - ls {must-gather-path}/namespaces/openshift-lvm-storage 2>/dev/null || \ - ls {must-gather-path}/namespaces/openshift-storage - - # Check for required directories - ls {must-gather-path}/cluster-scoped-resources/core/persistentvolumes - - # Note: The analysis script automatically detects which namespace is used - ``` - -3. **Determine Analysis Scope**: - - Check for component-specific keywords in arguments: - - If the argument contains one or more of `storage`, `pvc`, `pv`, `volumes`, `pending` then only do storage/pvc analysis. - - If the argument contains one or more of `operator`, `lvmcluster`, `deployment`, `pods` then analyze operator health only - - If the argument contains one or more of `vg`, `volume group`, `disk`, `device` then do Volume group analysis only - - If the argument contains one or more of `node`, `devices`, `lsblk` then do node-level device analysis only (live cluster only) - - If the argument contains one or more of `logs`, `errors` then do pod log analysis only - - If no specific component provided then do full comprehensive analysis - -4. **Collect LVMS Resources**: - - **Live Cluster Collection:** - - First, detect which namespace LVMS is using: - ```bash - # Detect LVMS namespace (newer versions use openshift-lvm-storage, older use openshift-storage) - LVMS_NS=$(oc get namespace openshift-lvm-storage -o name 2>/dev/null | cut -d/ -f2) - if [ -z "$LVMS_NS" ]; then - LVMS_NS="openshift-storage" - fi - ``` - - Then collect resources: - ```bash - # LVMCluster resources - oc get lvmcluster -n $LVMS_NS -o yaml - - # LVMVolumeGroup status - oc get lvmvolumegroup -A -o yaml - oc get lvmvolumegroupnodestatus -A -o yaml - - # Operator pods - oc get pods -n $LVMS_NS -o wide - oc get pods -n $LVMS_NS -o yaml - - # Storage resources - oc get pvc -A -o yaml | grep -A 50 "storageClassName: lvms-" - oc get pv -o yaml | grep -A 50 "storageClassName: lvms-" - - # Events in LVMS namespace - oc get events -n $LVMS_NS --sort-by='.lastTimestamp' - - # Storage classes - oc get storageclass | grep lvms - oc get storageclass -o yaml | grep -A 20 "provisioner: topolvm.io" - - # Node information - oc get nodes -o wide - - # TopoLVM CSI components - oc get daemonset -n $LVMS_NS - oc get deployment -n $LVMS_NS - ``` - - **Must-Gather Collection:** - Use Python script if available (automatically detects namespace): - ```bash - python3 plugins/lvms/skills/lvms-analyzer/scripts/analyze_lvms.py {must-gather-path} - ``` - - The script automatically detects and uses the correct namespace (openshift-lvm-storage or openshift-storage). - - Or use built-in file reading: - ```bash - # Find LVMCluster resources - find {must-gather-path} -name "lvmclusters.yaml" - - # Find LVMVolumeGroup resources - find {must-gather-path} -name "lvmvolumegroups.yaml" - - # Find operator pods - cat {must-gather-path}/namespaces/openshift-lvm-storage/pods.yaml - - # Find events - cat {must-gather-path}/namespaces/openshift-lvm-storage/events.yaml - ``` - -5. **Analyze LVMCluster Health**: - - Check critical status fields: - ```yaml - # LVMCluster Status - status: - state: Ready | Progressing | Failed | Degraded | Unknown - ready: true | false - conditions: - - type: ResourcesAvailable - status: True | False - reason: ... - message: ... - - type: VolumeGroupsReady - status: True | False - reason: ... - message: ... - ``` - - Report findings: - ``` - ═══════════════════════════════════════════════════════════ - LVMCLUSTER STATUS - ═══════════════════════════════════════════════════════════ - - LVMCluster: lvmcluster-sample - State: Ready ✓ | Progressing ⚠ | Failed ❌ - Ready: true ✓ | false ❌ - - Conditions: - ✓ ResourcesAvailable: True (All resources deployed) - ❌ VolumeGroupsReady: False (Volume group vg1 not found on node worker-0) - - Device Classes: - - vg1: 3 nodes, thin pool enabled - Status: 2/3 nodes ready - Missing: worker-0 - - Issues Detected: - ❌ CRITICAL: Volume group not created on worker-0 - ⚠ WARNING: Thin pool size at 85% capacity - ``` - -6. **Analyze Volume Groups**: - - For each LVMVolumeGroup and LVMVolumeGroupNodeStatus: - ```bash - # Check VG status across nodes - oc get lvmvolumegroup -A -o json | jq -r '.items[] | { - name: .metadata.name, - namespace: .metadata.namespace, - status: .status - }' - - # Check node-level VG status - oc get lvmvolumegroupnodestatus -A -o json | jq -r '.items[] | { - node: .metadata.name, - vgs: .spec.volumeGroups, - status: .status - }' - ``` - - Report findings: - ``` - ═══════════════════════════════════════════════════════════ - VOLUME GROUP STATUS - ═══════════════════════════════════════════════════════════ - - Volume Group: vg1 - Nodes: 3 - - Node: master-0 - ✓ VG Created: vg1 - ✓ PV Count: 1 - ✓ Free Space: 450 GiB / 500 GiB - ✓ Thin Pool: lvm-thin-pool-0 (90% allocated, 75% used) - - Node: worker-0 - ❌ VG Status: Failed - ❌ Error: No available devices found - ℹ Devices: /dev/sdb (rejected: already in use) - - Issues Detected: - ❌ worker-0: Device /dev/sdb already has filesystem - ⚠ master-0: Thin pool nearing capacity - ``` - -7. **Analyze PVC/PV Issues**: - - Find problematic PVCs: - ```bash - # Find pending PVCs using LVMS - oc get pvc -A -o json | jq -r '.items[] | - select(.spec.storageClassName | startswith("lvms-")) | - select(.status.phase != "Bound") | - {namespace: .metadata.namespace, name: .metadata.name, - phase: .status.phase, storageClass: .spec.storageClassName}' - ``` - - For each problematic PVC: - ```bash - # Get PVC details - oc describe pvc {pvc-name} -n {namespace} - - # Check events - oc get events -n {namespace} --field-selector involvedObject.name={pvc-name} - ``` - - Report findings: - ``` - ═══════════════════════════════════════════════════════════ - STORAGE (PVC/PV) STATUS - ═══════════════════════════════════════════════════════════ - - Total PVCs using LVMS: 15 - Bound: 12 ✓ - Pending: 3 ❌ - - Pending PVCs: - - 1. test-app/data-volume - Status: Pending (10m) - Storage Class: lvms-vg1 - Requested: 100Gi - - Recent Events: - - 10m Warning ProvisioningFailed: - "failed to provision volume: no node has enough free space" - - Root Cause: - ❌ Insufficient free space in volume group vg1 - Current available: 45Gi across all nodes - Largest available on single node: 20Gi - - 2. database/postgres-data - Status: Pending (5m) - Storage Class: lvms-vg1 - Requested: 50Gi - - Recent Events: - - 5m Warning ProvisioningFailed: - "topology constraint not satisfied" - - Root Cause: - ⚠ PVC has node affinity requiring worker-0 - ❌ worker-0 has no functional volume group - ``` - -8. **Analyze Operator Health**: - - Check operator pods: - ```bash - # Get all pods in LVMS namespace - oc get pods -n $LVMS_NS -o json - - # Check for crashloops, errors, restarts - oc get pods -n $LVMS_NS -o json | jq -r '.items[] | - {name: .metadata.name, - phase: .status.phase, - ready: .status.conditions[] | select(.type=="Ready") | .status, - restarts: .status.containerStatuses[].restartCount}' - ``` - - Check deployments and daemonsets: - ```bash - oc get deployment -n $LVMS_NS -o wide - oc get daemonset -n $LVMS_NS -o wide - ``` - - Report findings: - ``` - ═══════════════════════════════════════════════════════════ - OPERATOR HEALTH - ═══════════════════════════════════════════════════════════ - - Deployments: - ✓ lvms-operator: 1/1 replicas ready - - DaemonSets: - ✓ vg-manager: 3/3 nodes ready - - Pod Issues: - - ❌ vg-manager-abc123 (worker-0) - Status: CrashLoopBackOff - Restarts: 15 - - Container Logs (last 20 lines): - Error: failed to create volume group: exit status 5 - Error: volume group "vg1" creation failed - - Root Cause: - Volume group vg1 not created on worker-0 due to device conflicts - ``` - -9. **Analyze Node Device Status**: - - For live clusters, check devices on nodes: - ```bash - # For each node, check available block devices - oc debug node/{node-name} -- chroot /host lsblk --paths --json -o NAME,ROTA,TYPE,SIZE,MODEL,FSTYPE,MOUNTPOINT - - # Check which devices are being used by LVMS - oc debug node/{node-name} -- chroot /host vgs -o vg_name,pv_name,vg_size,vg_free - oc debug node/{node-name} -- chroot /host pvs -o pv_name,vg_name,pv_size,pv_free,pv_used - ``` - - Report findings: - ``` - ═══════════════════════════════════════════════════════════ - NODE DEVICE STATUS - ═══════════════════════════════════════════════════════════ - - Node: worker-0 - - Block Devices: - ✓ /dev/sda: 100GB (system disk, mounted as /) - ⚠ /dev/sdb: 500GB (has ext4 filesystem, not available) - ✓ /dev/sdc: 500GB (available for LVMS) - - Current VG Configuration: - ❌ No volume groups found - - Issues: - ❌ Device /dev/sdb has existing filesystem (ext4) - ℹ Device /dev/sdc is available but not configured - - Recommendations: - 1. Wipe filesystem on /dev/sdb: wipefs -a /dev/sdb - 2. Update LVMCluster to use /dev/sdc - 3. Or remove /dev/sdb from LVMCluster deviceSelector - ``` - -10. **Check TopoLVM Configuration**: - - Verify operator installation: - ```bash - # Check operator pods - oc get pods -n $LVMS_NS -l app.kubernetes.io/component=lvms-operator - - # Check storage classes - oc get storageclass -o json | jq -r '.items[] | - select(.provisioner == "topolvm.io") | - {name: .metadata.name, - parameters: .parameters, - volumeBindingMode: .volumeBindingMode}' - ``` - - Report findings: - ``` - ═══════════════════════════════════════════════════════════ - TOPOLVM CSI DRIVER - ═══════════════════════════════════════════════════════════ - - Operator Deployment: - ✓ lvms-operator: Running - - Storage Classes: - ✓ lvms-vg1 - Provisioner: topolvm.io - Volume Binding: WaitForFirstConsumer - Device Class: vg1 - Filesystem: xfs - - Note: CSI driver is integrated into the LVMS operator and vg-manager components - ``` - -11. **Analyze Pod Logs**: - - Extract and analyze error/warning messages from pod logs: - - **Live Cluster:** - ```bash - # Get logs from vg-manager pods - for pod in $(oc get pods -n $LVMS_NS -l app.kubernetes.io/component=vg-manager -o name); do - oc logs -n $LVMS_NS $pod --tail=1000 - done - - # Get logs from lvms-operator pod - oc logs -n $LVMS_NS deployment/lvms-operator --tail=1000 - ``` - - **Must-Gather:** - ```bash - # Pod logs are located at: - # namespaces/{lvms-namespace}/pods/{pod-name}/{container}/{container}/logs/current.log - ``` - - **Processing:** - ```bash - # Parse JSON-formatted log entries - # Extract error and warning level messages - # Deduplicate repeated errors from reconciliation loops - ``` - - Report findings: - ``` - ═══════════════════════════════════════════════════════════ - POD LOGS ANALYSIS - ═══════════════════════════════════════════════════════════ - - Pod: vg-manager-abc123 - Unique errors/warnings: 2 - - ❌ 2025-10-28T10:47:28Z: Reconciler error - Controller: lvmvolumegroup - Error Details: - failed to create/extend volume group vg1: failed to extend volume group vg1: - WARNING: VG name vg0 is used by VGs ... - Cannot use /dev/dm-10: device has a signature - Command requires all devices to be found. - - Pod: lvms-operator-xyz456 - Unique errors/warnings: 1 - - ❌ 2025-10-28T10:52:48Z: failed to validate device class setup - Controller: lvmcluster - Error: VG vg1 on node Degraded is not in ready state - ``` - -12. **Generate Comprehensive Report**: - - Synthesize all findings: - ``` - ═══════════════════════════════════════════════════════════ - LVMS ANALYSIS SUMMARY - ═══════════════════════════════════════════════════════════ - - Analysis Mode: Live Cluster | Must-Gather - Cluster: {cluster-name} - LVMS Version: {version} - Analysis Time: {timestamp} - - ✓ HEALTHY: {count} - - LVMCluster in Ready state - - 12/15 PVCs successfully bound - - Operator pods running on 2/3 nodes - - ⚠ WARNINGS: {count} - - Thin pool at 85% capacity on master-0 - - vg-manager daemonset not ready on all nodes - - ❌ CRITICAL ISSUES: {count} - - Volume group vg1 not created on worker-0 - - 3 PVCs stuck in Pending state - - Device /dev/sdb on worker-0 has conflicting filesystem - - ═══════════════════════════════════════════════════════════ - ROOT CAUSE ANALYSIS - ═══════════════════════════════════════════════════════════ - - Primary Issue: Volume Group Creation Failure on worker-0 - - Chain of Impact: - 1. Device /dev/sdb on worker-0 has existing ext4 filesystem - 2. vg-manager cannot create volume group vg1 - 3. Volume group missing on worker-0 - 4. Storage provisioning not functional on worker-0 - 5. PVCs with node affinity to worker-0 stuck Pending - - ═══════════════════════════════════════════════════════════ - RECOMMENDATIONS (Prioritized) - ═══════════════════════════════════════════════════════════ - - CRITICAL (Fix Immediately): - - 1. Clean device on worker-0: - # Access the node - oc debug node/worker-0 - - # Wipe the filesystem - chroot /host wipefs -a /dev/sdb - - # Verify device is clean - chroot /host lsblk /dev/sdb - - 2. Restart vg-manager to recreate volume group: - oc delete pod -n openshift-lvm-storage -l app.kubernetes.io/component=vg-manager - - 3. Verify volume group created: - oc debug node/worker-0 -- chroot /host vgs - - 4. Restart vg-manager on worker-0: - oc delete pod -n openshift-lvm-storage -l app.kubernetes.io/component=vg-manager - - 5. Verify PVCs bind: - oc get pvc -A | grep Pending - - WARNINGS (Address Soon): - - 6. Expand thin pool or clean up unused volumes: - # List logical volumes by size - oc debug node/master-0 -- chroot /host lvs --units g - - # Consider expanding thin pool or removing old volumes - - ═══════════════════════════════════════════════════════════ - NEXT STEPS - ═══════════════════════════════════════════════════════════ - - 1. Review and execute recommendations above - 2. Monitor LVMS operator logs: - oc logs -n openshift-lvm-storage deployment/lvms-operator -f - 3. Check volume group status after fixes: - /lvms:analyze --live --component volumes - 4. If issues persist, collect must-gather: - oc adm must-gather --image=quay.io/lvms_dev/lvms-must-gather:latest - - ═══════════════════════════════════════════════════════════ - ADDITIONAL RESOURCES - ═══════════════════════════════════════════════════════════ - - - LVMS Documentation: - https://github.com/openshift/lvm-operator/tree/main/docs - - - Troubleshooting Guide: - https://github.com/openshift/lvm-operator/blob/main/docs/troubleshooting.md - - - TopoLVM Documentation: - https://github.com/topolvm/topolvm - - Logs to Review: - - /namespaces/openshift-lvm-storage/pods/lvms-operator-*/logs/manager/current.log - - /namespaces/openshift-lvm-storage/pods/vg-manager-*/logs/vg-manager/current.log - ``` - -12. **Component-Specific Analysis**: - - If user requests specific component: - - Run only relevant analysis sections - - Provide focused output for that component - - Skip irrelevant checks - -## Return Value - -The command outputs a comprehensive analysis report to stdout: - -**Format:** -- Structured sections for each component -- Visual indicators: ✓ (healthy), ⚠ (warning), ❌ (critical) -- Root cause analysis connecting related issues -- Prioritized recommendations with specific commands -- Links to relevant logs and documentation - -**Success States:** -- **All Healthy**: Summary of healthy state with key metrics -- **Warnings Found**: Issues identified with recommendations -- **Critical Issues**: Detailed diagnosis with step-by-step remediation - -## Examples - -1. **Analyze live cluster (full analysis)**: - ``` - /lvms:analyze --live - ``` - Connects to current cluster and runs comprehensive LVMS diagnostics. - -2. **Analyze must-gather data**: - ``` - /lvms:analyze ./must-gather/registry-ci-openshift-org-origin-4-18.../ - ``` - Analyzes LVMS must-gather data offline. - -3. **Check only PVC issues on live cluster**: - ``` - /lvms:analyze --live check pending PVCs - ``` - Runs focused analysis on storage/PVC issues only. - -4. **Analyze volume groups in must-gather**: - ``` - /lvms:analyze ./must-gather/... --component volumes - ``` - Analyzes only volume group status and configuration. - -5. **Debug operator health**: - ``` - /lvms:analyze --live analyze operator pods - ``` - Focuses on LVMS operator and TopoLVM component health. - -6. **Check specific node's storage**: - ``` - /lvms:analyze --live check devices on worker-0 - ``` - Analyzes block devices and volume groups on specific node. - -7. **Analyze pod logs only (must-gather)**: - ``` - /lvms:analyze ./must-gather/... --component logs - ``` - Extracts and analyzes error messages from vg-manager and lvms-operator pod logs. - -8. **Analyze pod logs on live cluster**: - ``` - /lvms:analyze --live --component logs - ``` - Retrieves and analyzes current pod logs from running cluster. - -## Notes - -- **Must-Gather Path**: Always use the subdirectory containing `cluster-scoped-resources/` and `namespaces/`, not the parent directory -- **Namespace Compatibility**: LVMS namespace changed from `openshift-storage` (older versions) to `openshift-lvm-storage` (newer versions). The command automatically detects and uses the correct namespace in both live clusters and must-gathers -- **Live Cluster Access**: Requires read permissions to LVMS namespace and cluster-scoped resources -- **Node Debugging**: For device-level analysis on live clusters, the command uses `oc debug node/...` which requires elevated privileges -- **Pod Log Analysis**: Available for both live clusters (via `oc logs`) and must-gather data. Parses JSON-formatted logs, extracts errors/warnings, and deduplicates repeated reconciliation errors -- **Python Script**: If `analyze_lvms.py` script is available, it will be used for must-gather analysis for better performance -- **Cross-Component Correlation**: The analysis attempts to correlate issues across components (e.g., missing VG → pod crash → PVC pending → pod log errors) -- **Actionable Output**: Focuses on root causes and specific remediation steps rather than raw data dumps -- **Safety**: All recommendations include verification steps; no destructive operations are performed automatically - -## Arguments - -- **$1** (must-gather-path): Optional. Path to LVMS must-gather directory. If provided without `--live`, assumes must-gather analysis mode. - - Example: `./must-gather/registry-ci-openshift-org-origin-4-18.../` - -- **--live**: Optional flag. Use live cluster analysis mode. Requires active `oc` connection. - - Example: `/lvms:analyze --live` - -- **--component**: Optional. Focus analysis on specific component: - - `storage` / `pvc` / `volumes`: PVC and PV analysis - - `operator` / `pods`: Operator health and pod status - - `vg` / `volume-group`: Volume group configuration - - `node` / `devices`: Node-level device analysis - - `logs`: Pod log analysis - - `all`: Full analysis (default) - -- **Additional text**: Natural language text describing what to focus on (parsed for component keywords) - - Example: `check why PVCs are pending` - - Example: `analyze volume group on worker-0` - -## Troubleshooting - -**Cannot connect to cluster:** -```bash -# Verify oc is configured -oc whoami -oc cluster-info - -# Check namespace exists (try both old and new namespaces) -oc get namespace openshift-lvm-storage 2>/dev/null || \ - oc get namespace openshift-storage -``` - -**Must-gather path not found:** -```bash -# Verify directory structure (checks both namespaces) -ls {must-gather-path}/namespaces/openshift-lvm-storage 2>/dev/null || \ - ls {must-gather-path}/namespaces/openshift-storage - -# Use the correct subdirectory -ls {must-gather-path}/*/namespaces/openshift-lvm-storage 2>/dev/null || \ - ls {must-gather-path}/*/namespaces/openshift-storage -``` - -**Permission denied for node debugging:** -```bash -# Check permissions -oc auth can-i debug node - -# May require cluster-admin or privileged SCC -``` - -**Python script not found:** -- Command will fall back to built-in analysis -- For better performance, ensure script exists at: - `plugins/lvms/skills/lvms-analyzer/scripts/analyze_lvms.py` - -## Related Commands - -- `/must-gather:analyze` - General cluster analysis -- `/olm:diagnose` - OLM and operator troubleshooting -- `/ci:analyze-prow-job-test-failure` - CI test failure analysis - -## Additional Resources - -- [LVMS GitHub Repository](https://github.com/openshift/lvm-operator) -- [LVMS Troubleshooting Guide](https://github.com/openshift/lvm-operator/blob/main/docs/troubleshooting.md) -- [TopoLVM Documentation](https://github.com/topolvm/topolvm) -- [OpenShift Storage Documentation](https://docs.openshift.com/container-platform/latest/storage/index.html) diff --git a/plugins/lvms/skills/lvms-analyzer/SKILL.md b/plugins/lvms/skills/lvms-analyzer/SKILL.md deleted file mode 100644 index a1149cc09..000000000 --- a/plugins/lvms/skills/lvms-analyzer/SKILL.md +++ /dev/null @@ -1,517 +0,0 @@ ---- -name: LVMS Analyzer -description: Analyzes LVMS must-gather data to diagnose storage issues ---- - -# LVMS Analyzer Skill - -This skill provides detailed guidance for analyzing LVMS (Logical Volume Manager Storage) must-gather data to identify and troubleshoot storage issues. - -## When to Use This Skill - -Use this skill when: -- Analyzing LVMS must-gather data offline -- Diagnosing PVCs stuck in Pending state -- Investigating LVMCluster readiness issues -- Troubleshooting volume group creation failures -- Debugging TopoLVM CSI driver problems -- Checking operator health in LVMS namespace - -This skill is automatically invoked by the `/lvms:analyze` command when working with must-gather data. - -## Prerequisites - -**Required:** -- LVMS must-gather directory extracted and accessible -- Must-gather contains LVMS namespace directory: - - `namespaces/openshift-lvm-storage/` (newer versions) - - OR `namespaces/openshift-storage/` (older versions) -- Python 3.6 or higher installed -- PyYAML library: `pip install pyyaml` - -**Namespace Compatibility:** -- LVMS namespace changed from `openshift-storage` to `openshift-lvm-storage` in recent versions -- The analysis script automatically detects which namespace is present -- Both namespaces are fully supported for backward compatibility - -**Must-Gather Structure:** -``` -must-gather/ -└── registry-{image-registry}-lvms-must-gather-{version}-sha256-{hash}/ - ├── cluster-scoped-resources/ - │ ├── core/ - │ │ └── persistentvolumes/ - │ │ └── pvc-*.yaml # Individual PV files - │ ├── storage.k8s.io/ - │ │ └── storageclasses/ - │ │ ├── lvms-vg1.yaml - │ │ └── lvms-vg1-immediate.yaml - │ └── security.openshift.io/ - │ └── securitycontextconstraints/ - │ └── lvms-vgmanager.yaml - ├── namespaces/ - │ └── openshift-lvm-storage/ # or openshift-storage for older versions - │ ├── oc_output/ # IMPORTANT: Primary location for LVMS resources - │ │ ├── lvmcluster.yaml # Full LVMCluster resource with status - │ │ ├── lvmcluster # Text output (oc describe) - │ │ ├── lvmvolumegroup # Text output - │ │ ├── lvmvolumegroupnodestatus # Text output - │ │ ├── logicalvolume # Text output - │ │ ├── pods # Text output (oc get pods) - │ │ └── events # Text output - │ ├── pods/ - │ │ ├── lvms-operator-{hash}/ - │ │ │ └── lvms-operator-{hash}.yaml - │ │ └── vg-manager-{hash}/ - │ │ └── vg-manager-{hash}.yaml - │ └── apps/ # May contain deployments/daemonsets - └── ... -``` - -**Key Note:** LVMS resources are primarily in the `oc_output/` directory, with `lvmcluster.yaml` being the most important file containing full cluster and node status. - -## Implementation Steps - -### Step 1: Validate Must-Gather Path - -Before running analysis, verify the must-gather directory structure: - -```bash -# Check if LVMS namespace directory exists (try both namespaces) -ls {must-gather-path}/namespaces/openshift-lvm-storage 2>/dev/null || \ - ls {must-gather-path}/namespaces/openshift-storage - -# Verify required resource directories -ls {must-gather-path}/cluster-scoped-resources/core/persistentvolumes -``` - -**Namespace Detection:** -The analysis script automatically detects which namespace is present: -- Newer LVMS versions use `openshift-lvm-storage` -- Older LVMS versions use `openshift-storage` -- The script will inform you which namespace was detected - -**Common Issue:** User provides parent directory instead of subdirectory -- Must-gather extracts to a directory like `must-gather.local.12345/` -- Inside is a subdirectory like `registry-ci-openshift-org-origin-4-18.../` -- Always use the **subdirectory** (the one with cluster-scoped-resources/ and namespaces/) - -**Handling:** -```bash -# If user provides parent directory, try to find the correct subdirectory -if [ ! -d "{path}/namespaces/openshift-lvm-storage" ] && \ - [ ! -d "{path}/namespaces/openshift-storage" ]; then - # Try to find either namespace - find {path} -type d \( -name "openshift-lvm-storage" -o -name "openshift-storage" \) -path "*/namespaces/*" - # Suggest the correct path to user -fi -``` - -### Step 2: Run Analysis Script - -Use the Python analysis script for structured analysis: - -```bash -python3 plugins/lvms/skills/lvms-analyzer/scripts/analyze_lvms.py \ - {must-gather-path} -``` - -**Script Location:** -- Always use: `plugins/lvms/skills/lvms-analyzer/scripts/analyze_lvms.py` -- Use relative path from repository root -- Script is part of the LVMS plugin - -**Component-Specific Analysis:** - -For focused analysis on specific components: - -```bash -# Analyze only storage/PVC issues -python3 plugins/lvms/skills/lvms-analyzer/scripts/analyze_lvms.py \ - {must-gather-path} --component storage - -# Analyze only operator health -python3 plugins/lvms/skills/lvms-analyzer/scripts/analyze_lvms.py \ - {must-gather-path} --component operator - -# Analyze only volume groups -python3 plugins/lvms/skills/lvms-analyzer/scripts/analyze_lvms.py \ - {must-gather-path} --component volumes - -# Analyze only pod logs -python3 plugins/lvms/skills/lvms-analyzer/scripts/analyze_lvms.py \ - {must-gather-path} --component logs -``` - -### Step 3: Interpret Analysis Results - -The script provides structured output across several sections: - -**1. LVMCluster Status** - -Key fields to check: -- `state`: Should be "Ready" -- `ready`: Should be true -- `conditions`: All should have status "True" - - ResourcesAvailable: Resources deployed successfully - - VolumeGroupsReady: VGs created on all nodes - -Example healthy output: -``` -LVMCluster: lvmcluster-sample -✓ State: Ready -✓ Ready: true - -Conditions: -✓ ResourcesAvailable: True -✓ VolumeGroupsReady: True -``` - -Example unhealthy output (real case from must-gather): -``` -LVMCluster: my-lvmcluster -❌ State: Degraded -❌ Ready: false - -Conditions: -✓ ResourcesAvailable: True - Reason: ResourcesAvailable - Message: Reconciliation is complete and all the resources are available -❌ VolumeGroupsReady: False - Reason: VGsDegraded - Message: One or more VGs are degraded -``` - -**2. Volume Group Status** - -Checks volume group creation per node and device availability: - -Example output (real case from must-gather): -``` -Volume Group/Device Class: vg1 -Nodes: 3 - - Node: ocpnode1.ocpiopex.growipx.com - ⚠ Status: Progressing - - Devices: /dev/mapper/3600a098038315048302b586c38397562, /dev/mapper/mpatha - - Excluded devices: 24 device(s) - - /dev/sdb: /dev/sdb has children block devices and could not be considered - - /dev/sdb4: /dev/sdb4 has an invalid filesystem signature (xfs) and cannot be used - - /dev/mapper/3600a098038315047433f586c53477272: has an invalid filesystem signature (xfs) - ... and 21 more excluded devices - - Node: ocpnode2.ocpiopex.growipx.com - ❌ Status: Degraded - - Reason: - failed to create/extend volume group vg1: failed to extend volume group vg1: - WARNING: VG name vg0 is used by VGs VVnkhP-khYQ-blyc-2TNo-d3cv-b6di-4RbSyY and EUV3xv-ft6q-39xK-J3ki-rglf-9H44-rVIHIq. - Fix duplicate VG names with vgrename uuid, a device filter, or system IDs. - Physical volume '/dev/mapper/3600a098038315048302b586c38397578p3' is already in volume group 'vg0' - Unable to add physical volume '/dev/mapper/3600a098038315048302b586c38397578p3' to volume group 'vg0' - ... (truncated, see LVMCluster status for full details) - - Devices: /dev/mapper/mpatha -``` - -This real example shows a common LVMS issue: duplicate volume group names preventing VG extension. - -**3. Storage (PVC/PV) Status** - -Lists pending or failed PVCs: - -Example output: -``` -Pending PVCs: - -database/postgres-data -❌ Status: Pending (10m) - Storage Class: lvms-vg1 - Requested: 100Gi - - Recent Events: - ⚠ ProvisioningFailed: no node has enough free space -``` - -**4. Operator Health** - -Checks LVMS operator pods, deployments, and daemonsets: - -Example issues: -``` -❌ vg-manager-abc123 (worker-0) - Status: CrashLoopBackOff - Restarts: 15 - Error: volume group "vg1" not found -``` - -**5. Pod Logs** - -Extracts and analyzes error/warning messages from pod logs: - -Example output (from real must-gather): -``` -═══════════════════════════════════════════════════════════ -POD LOGS ANALYSIS -═══════════════════════════════════════════════════════════ - -Pod: vg-manager-nz4pc -Unique errors/warnings: 1 - -❌ 2025-10-28T10:47:28Z: Reconciler error - Controller: lvmvolumegroup - Error Details: - failed to create/extend volume group vg1: failed to extend volume group vg1: - WARNING: VG name vg0 is used by VGs WsNJwk-DK3q-tSHg-zvQJ-imF1-SdRv-8oh4e0 ... - Cannot use /dev/dm-10: device is too small (pv_min_size) - Command requires all devices to be found. - -Pod: lvms-operator-65df9f4dbb-92jwl -Unique errors/warnings: 1 - -❌ 2025-10-28T10:52:48Z: failed to validate device class setup - Controller: lvmcluster - Error: VG vg1 on node Degraded is not in ready state (ocpnode1.ocpiopex.growipx.com) -``` - -**Key Points:** -- Logs are parsed from JSON format -- Errors are deduplicated (same error repeated in reconciliation loops) -- Shows unique error messages with first occurrence timestamp -- Provides additional context not visible in resource status - -### Step 4: Analyze Root Causes - -Connect related issues to identify root causes: - -**Common Pattern 1: Device Filesystem Conflict** -``` -Chain of failures: -1. Device /dev/sdb has existing ext4 filesystem -2. vg-manager cannot create volume group -3. Volume group missing on node -4. PVCs stuck in Pending - -Root cause: Device not properly wiped before LVMS use -``` - -**Common Pattern 2: Insufficient Capacity** -``` -Chain of failures: -1. Thin pool at 95% capacity -2. No free space for new volumes -3. PVCs stuck in Pending - -Root cause: Insufficient storage capacity or old volumes not cleaned up -``` - -**Common Pattern 3: Node-Specific Failures** -``` -Chain of failures: -1. Volume group missing on specific node -2. TopoLVM CSI driver not functional on that node -3. PVCs with node affinity to that node stuck Pending - -Root cause: Node-specific device configuration issue -``` - -### Step 5: Generate Remediation Plan - -Based on analysis results, provide prioritized recommendations: - -**CRITICAL Issues (Fix Immediately):** - -1. **Device Conflicts:** - ```bash - # Clean device on affected node - oc debug node/{node-name} - chroot /host wipefs -a /dev/{device} - - # Restart vg-manager to recreate VG - oc delete pod -n openshift-lvm-storage -l app.kubernetes.io/component=vg-manager - ``` - -2. **Pod Crashes:** - ```bash - # After fixing underlying issue, restart failed pods - oc delete pod -n openshift-lvm-storage {pod-name} - ``` - -3. **LVMCluster Not Ready:** - ```bash - # Review and fix device configuration - oc edit lvmcluster -n openshift-lvm-storage - - # Ensure devices match actual available devices - ``` - -**WARNING Issues (Address Soon):** - -1. **Capacity Issues:** - ```bash - # Check logical volume usage - oc debug node/{node} -- chroot /host lvs --units g - - # Remove unused volumes or expand thin pool - ``` - -2. **Partial Node Coverage:** - ```bash - # Investigate why daemonsets not on all nodes - oc get nodes --show-labels - oc describe daemonset -n openshift-lvm-storage - ``` - -### Step 6: Provide Next Steps - -Always provide clear next steps: - -1. **Review logs** (if available in must-gather): - - Operator logs: `namespaces/openshift-lvm-storage/pods/lvms-operator-*/logs/` - - VG-manager logs: `namespaces/openshift-lvm-storage/pods/vg-manager-*/logs/` - - TopoLVM logs: `namespaces/openshift-lvm-storage/pods/topolvm-*/logs/` - -2. **Verify fixes** (if cluster is accessible): - ```bash - # After implementing fixes, verify: - oc get lvmcluster -n openshift-lvm-storage - oc get lvmvolumegroup -A - oc get pvc -A | grep Pending - ``` - -3. **Re-collect must-gather** (if making changes): - ```bash - oc adm must-gather --image=quay.io/lvms_dev/lvms-must-gather:latest - ``` - -## Error Handling - -### Script Execution Errors - -**Script not found:** -```bash -# Verify script exists -ls plugins/lvms/skills/lvms-analyzer/scripts/analyze_lvms.py - -# Ensure it's executable -chmod +x plugins/lvms/skills/lvms-analyzer/scripts/analyze_lvms.py -``` - -**Python dependencies missing:** -```bash -# Install PyYAML -pip install pyyaml - -# Or use pip3 -pip3 install pyyaml -``` - -**Invalid YAML in must-gather:** -- Script handles YAML parsing errors gracefully -- Reports which files failed to parse -- Continues analysis with available data - -### Must-Gather Issues - -**Missing directories:** -- Script validates required directories exist -- Reports missing components -- Provides guidance on what's missing - -**Incomplete must-gather:** -- If critical resources missing, script reports what it can analyze -- Suggests re-collecting must-gather - -## Examples - -### Example 1: Full Analysis - -```bash -# Run comprehensive analysis -python3 plugins/lvms/skills/lvms-analyzer/scripts/analyze_lvms.py \ - ./must-gather/registry-ci-openshift-org-origin-4-18.../ -``` - -Output: -``` -═══════════════════════════════════════════════════════════ -LVMCLUSTER STATUS -═══════════════════════════════════════════════════════════ - -LVMCluster: lvmcluster-sample -❌ State: Failed -❌ Ready: false -... - -═══════════════════════════════════════════════════════════ -LVMS ANALYSIS SUMMARY -═══════════════════════════════════════════════════════════ - -❌ CRITICAL ISSUES: 3 - - LVMCluster not Ready (state: Failed) - - Volume group vg1 not created on worker-0 - - 3 PVCs stuck in Pending state -``` - -### Example 2: Storage-Only Analysis - -```bash -# Focus on PVC issues -python3 plugins/lvms/skills/lvms-analyzer/scripts/analyze_lvms.py \ - ./must-gather/... --component storage -``` - -Analyzes only: -- PVC/PV status -- Storage class configuration -- Volume provisioning issues - -### Example 3: Operator Health Check - -```bash -# Check operator components -python3 plugins/lvms/skills/lvms-analyzer/scripts/analyze_lvms.py \ - ./must-gather/... --component operator -``` - -Analyzes only: -- LVMCluster resource -- Deployments and daemonsets -- Pod status and crashes - -## Best Practices - -1. **Always validate path first:** - - Check for `namespaces/openshift-lvm-storage/` directory - - Use the correct subdirectory, not parent - -2. **Run full analysis first:** - - Get overall health picture - - Then drill down with component-specific analysis if needed - -3. **Correlate issues:** - - Look for patterns across components - - Connect pod failures to VG issues to PVC problems - -4. **Check timestamps:** - - Events and pod restarts have timestamps - - Helps understand sequence of failures - -5. **Provide actionable output:** - - Don't just list issues - - Explain root causes - - Give specific remediation steps - - Include verification commands - -6. **Reference documentation:** - - Link to LVMS troubleshooting guide - - Point to relevant sections in must-gather logs - -## Additional Resources - -- [LVMS Troubleshooting Guide](https://github.com/openshift/lvm-operator/blob/main/docs/troubleshooting.md) -- [LVMS Architecture](https://github.com/openshift/lvm-operator/tree/main/docs) -- [TopoLVM Documentation](https://github.com/topolvm/topolvm) -- [Must-Gather Collection](https://github.com/openshift/lvm-operator/tree/main/must-gather) diff --git a/plugins/lvms/skills/lvms-analyzer/scripts/analyze_lvms.py b/plugins/lvms/skills/lvms-analyzer/scripts/analyze_lvms.py deleted file mode 100644 index 4a6d1a012..000000000 --- a/plugins/lvms/skills/lvms-analyzer/scripts/analyze_lvms.py +++ /dev/null @@ -1,895 +0,0 @@ -#!/usr/bin/env python3 -""" -LVMS Must-Gather Analyzer - -Analyzes LVMS (Logical Volume Manager Storage) must-gather data to identify -and diagnose storage issues including LVMCluster health, volume groups, -PVC/PV problems, operator issues, and TopoLVM CSI driver status. - -Usage: - python3 analyze_lvms.py [--component ] - -Arguments: - must-gather-path: Path to the extracted must-gather directory - --component: Optional filter for specific component analysis - (storage, operator, volumes, all) -""" - -import os -import sys -import json -import yaml -import argparse -from pathlib import Path -from typing import Dict, List, Any, Optional -from collections import defaultdict - - -class Colors: - """ANSI color codes for terminal output""" - GREEN = '\033[92m' - YELLOW = '\033[93m' - RED = '\033[91m' - BLUE = '\033[94m' - BOLD = '\033[1m' - END = '\033[0m' - - -def print_section(title: str): - """Print a formatted section header""" - separator = "=" * 79 - print(f"\n{separator}") - print(f"{Colors.BOLD}{title}{Colors.END}") - print(f"{separator}\n") - - -def print_success(message: str): - """Print success message with checkmark""" - print(f"{Colors.GREEN}✓{Colors.END} {message}") - - -def print_warning(message: str): - """Print warning message""" - print(f"{Colors.YELLOW}⚠{Colors.END} {message}") - - -def print_error(message: str): - """Print error message""" - print(f"{Colors.RED}❌{Colors.END} {message}") - - -def print_info(message: str): - """Print info message""" - print(f"{Colors.BLUE}ℹ{Colors.END} {message}") - - -def load_yaml_file(file_path: Path) -> Optional[Any]: - """Load and parse a YAML file""" - try: - with open(file_path, 'r') as f: - # Handle multiple YAML documents in one file - docs = list(yaml.safe_load_all(f)) - return docs[0] if len(docs) == 1 else docs - except Exception as e: - print_error(f"Failed to load {file_path}: {e}") - return None - - -def find_files(base_path: Path, pattern: str) -> List[Path]: - """Recursively find files matching a pattern""" - return list(base_path.rglob(pattern)) - - -class LVMSAnalyzer: - """Main analyzer class for LVMS must-gather data""" - - def __init__(self, must_gather_path: str): - self.base_path = Path(must_gather_path) - # LVMS namespace changed from openshift-storage to openshift-lvm-storage - # Support both for backward compatibility with older must-gathers - self.lvms_namespace = None - self.possible_namespaces = ["openshift-lvm-storage", "openshift-storage"] - - # Data storage - self.lvmclusters = [] - self.lvmvolumegroups = [] - self.lvmvolumegroupnodestatus = [] - self.pods = [] - self.events = [] - self.pvcs = [] - self.pvs = [] - self.storage_classes = [] - self.deployments = [] - self.daemonsets = [] - - # Issue tracking - self.issues = { - 'critical': [], - 'warning': [], - 'info': [] - } - - # Pod logs - self.pod_logs = [] - - def validate_must_gather(self) -> bool: - """Validate that the must-gather path is correct and detect LVMS namespace""" - # Try to detect which namespace is used in this must-gather - for namespace in self.possible_namespaces: - lvms_ns_path = self.base_path / "namespaces" / namespace - if lvms_ns_path.exists(): - self.lvms_namespace = namespace - if namespace == "openshift-storage": - print_info(f"Detected older LVMS installation using namespace: {namespace}") - print_info("(Newer LVMS versions use openshift-lvm-storage namespace)") - else: - print_info(f"Detected LVMS namespace: {namespace}") - return True - - # If neither namespace found, try to help find the correct path - print_error(f"LVMS namespace directory not found in: {self.base_path}") - print_info(f"Looking for must-gather structure at: {self.base_path}") - - # Try to find the correct subdirectory - for namespace in self.possible_namespaces: - possible_paths = list(self.base_path.glob(f"*/namespaces/{namespace}")) - if possible_paths: - print_info(f"Found LVMS namespace '{namespace}' at: {possible_paths[0].parent.parent}") - print_info("Please use the correct subdirectory path") - return False - - print_error("Could not find openshift-lvm-storage or openshift-storage namespace") - print_info("This may not be an LVMS must-gather") - return False - - def load_resources(self): - """Load all LVMS-related resources from must-gather""" - print_info("Loading LVMS resources from must-gather...") - - # LVMS must-gathers store resources in oc_output directory - oc_output_dir = self.base_path / "namespaces" / self.lvms_namespace / "oc_output" - - # Load LVMCluster resources from oc_output/lvmcluster.yaml - lvmcluster_file = oc_output_dir / "lvmcluster.yaml" - if lvmcluster_file.exists(): - data = load_yaml_file(lvmcluster_file) - if data: - if isinstance(data, list): - self.lvmclusters.extend([item for item in data if item]) - elif isinstance(data, dict) and data.get('items'): - self.lvmclusters.extend(data['items']) - elif isinstance(data, dict) and data.get('kind') == 'LVMCluster': - self.lvmclusters.append(data) - else: - # Fallback: try finding in API group directories (newer structure) - lvmcluster_files = find_files( - self.base_path / "namespaces" / self.lvms_namespace, - "lvmclusters.yaml" - ) - for file in lvmcluster_files: - data = load_yaml_file(file) - if data: - if isinstance(data, list): - self.lvmclusters.extend([item for item in data if item]) - elif isinstance(data, dict) and data.get('items'): - self.lvmclusters.extend(data['items']) - elif isinstance(data, dict): - self.lvmclusters.append(data) - - # Note: LVMVolumeGroup and LVMVolumeGroupNodeStatus are often in text format in oc_output - # We'll extract info from LVMCluster status which contains node-level VG status - # For detailed VG info, check oc_output/lvmvolumegroup and lvmvolumegroupnodestatus text files - - # Load pods from pods directory - pods_dir = self.base_path / "namespaces" / self.lvms_namespace / "pods" - if pods_dir.exists(): - for pod_dir in pods_dir.iterdir(): - if pod_dir.is_dir(): - # Look for {pod-name}.yaml in the pod directory - pod_yaml = pod_dir / f"{pod_dir.name}.yaml" - if pod_yaml.exists(): - data = load_yaml_file(pod_yaml) - if data and isinstance(data, dict) and data.get('kind') == 'Pod': - self.pods.append(data) - - # Load events from oc_output or core directory - events_file = oc_output_dir / "events" - if not events_file.exists(): - # Try alternate location - events_files = find_files( - self.base_path / "namespaces" / self.lvms_namespace / "core", - "events.yaml" - ) - for file in events_files: - data = load_yaml_file(file) - if data: - if isinstance(data, dict) and data.get('items'): - self.events.extend(data['items']) - elif isinstance(data, list): - self.events.extend([item for item in data if item]) - - # Load PVCs (all namespaces, filter for LVMS storage classes) - pvc_files = find_files(self.base_path / "namespaces", "persistentvolumeclaims.yaml") - for file in pvc_files: - data = load_yaml_file(file) - if data: - if isinstance(data, dict) and data.get('items'): - self.pvcs.extend(data['items']) - elif isinstance(data, list): - self.pvcs.extend([item for item in data if item]) - - # Filter PVCs for LVMS storage classes - self.pvcs = [ - pvc for pvc in self.pvcs - if pvc.get('spec', {}).get('storageClassName', '').startswith('lvms-') - ] - - # Load PVs - pv_files = find_files( - self.base_path / "cluster-scoped-resources" / "core", - "persistentvolumes.yaml" - ) - for file in pv_files: - data = load_yaml_file(file) - if data: - if isinstance(data, dict) and data.get('items'): - pvs = data['items'] - elif isinstance(data, list): - pvs = data - else: - pvs = [data] if data else [] - - # Filter for TopoLVM provisioned volumes - self.pvs.extend([ - pv for pv in pvs - if pv.get('spec', {}).get('csi', {}).get('driver') == 'topolvm.io' - ]) - - # Load storage classes - sc_files = find_files( - self.base_path / "cluster-scoped-resources" / "storage.k8s.io", - "storageclasses.yaml" - ) - for file in sc_files: - data = load_yaml_file(file) - if data: - if isinstance(data, dict) and data.get('items'): - scs = data['items'] - elif isinstance(data, list): - scs = data - else: - scs = [data] if data else [] - - # Filter for TopoLVM storage classes - self.storage_classes.extend([ - sc for sc in scs - if sc.get('provisioner') == 'topolvm.io' - ]) - - # Load deployments and daemonsets - # Try apps directory first, then look in oc_output - apps_dir = self.base_path / "namespaces" / self.lvms_namespace / "apps" - - deploy_files = find_files(apps_dir, "deployments.yaml") if apps_dir.exists() else [] - for file in deploy_files: - data = load_yaml_file(file) - if data: - if isinstance(data, dict) and data.get('items'): - self.deployments.extend(data['items']) - elif isinstance(data, list): - self.deployments.extend([item for item in data if item]) - elif isinstance(data, dict) and data.get('kind') == 'Deployment': - self.deployments.append(data) - - ds_files = find_files(apps_dir, "daemonsets.yaml") if apps_dir.exists() else [] - for file in ds_files: - data = load_yaml_file(file) - if data: - if isinstance(data, dict) and data.get('items'): - self.daemonsets.extend(data['items']) - elif isinstance(data, list): - self.daemonsets.extend([item for item in data if item]) - elif isinstance(data, dict) and data.get('kind') == 'DaemonSet': - self.daemonsets.append(data) - - print_success(f"Loaded {len(self.lvmclusters)} LVMCluster(s)") - print_success(f"Loaded {len(self.pods)} pod(s)") - print_success(f"Loaded {len(self.pvcs)} LVMS PVC(s)") - print_success(f"Loaded {len(self.pvs)} LVMS PV(s)") - print_success(f"Loaded {len(self.deployments)} deployment(s)") - print_success(f"Loaded {len(self.daemonsets)} daemonset(s)") - - def load_pod_logs(self): - """Load and parse pod logs from must-gather""" - print_info("Loading pod logs...") - - pods_dir = self.base_path / "namespaces" / self.lvms_namespace / "pods" - if not pods_dir.exists(): - return - - log_entries = [] - - for pod_dir in pods_dir.iterdir(): - if not pod_dir.is_dir(): - continue - - pod_name = pod_dir.name - - # Find log files in pod directory - # Structure: pods/{pod-name}/{container}/{container}/logs/current.log - for container_dir in pod_dir.iterdir(): - if not container_dir.is_dir(): - continue - - # Navigate to nested container directory - nested_container_dir = container_dir / container_dir.name / "logs" - if nested_container_dir.exists(): - log_file = nested_container_dir / "current.log" - if log_file.exists(): - container_name = container_dir.name - entries = self._parse_log_file(log_file, pod_name, container_name) - log_entries.extend(entries) - - # Deduplicate log entries by error message - unique_errors = {} - for entry in log_entries: - error_key = entry['msg'] - if error_key not in unique_errors: - unique_errors[error_key] = entry - else: - # Keep the earliest occurrence - if entry['ts'] < unique_errors[error_key]['ts']: - unique_errors[error_key] = entry - - self.pod_logs = list(unique_errors.values()) - print_success(f"Loaded {len(self.pod_logs)} unique error/warning messages from pod logs") - - def _parse_log_file(self, log_file: Path, pod_name: str, container_name: str) -> List[Dict[str, Any]]: - """Parse a JSON-formatted log file and extract error/warning entries""" - entries = [] - - try: - with open(log_file, 'r') as f: - for line_num, line in enumerate(f, 1): - line = line.strip() - if not line: - continue - - try: - # Log format: "TIMESTAMP JSON" - # Split on first space to separate timestamp from JSON - parts = line.split(' ', 1) - if len(parts) < 2: - continue - - json_part = parts[1] - log_entry = json.loads(json_part) - - # Extract error and warning level logs - level = log_entry.get('level', '').lower() - if level in ['error', 'warning']: - entries.append({ - 'pod': pod_name, - 'container': container_name, - 'level': level, - 'ts': log_entry.get('ts', ''), - 'msg': log_entry.get('msg', ''), - 'error': log_entry.get('error', ''), - 'controller': log_entry.get('controller', ''), - 'raw': log_entry - }) - except (json.JSONDecodeError, IndexError): - # Skip non-JSON lines or malformed lines - continue - except Exception as e: - print_error(f"Failed to parse log file {log_file}: {e}") - - return entries - - def analyze_lvmcluster(self): - """Analyze LVMCluster resource health""" - print_section("LVMCLUSTER STATUS") - - if not self.lvmclusters: - print_warning("No LVMCluster resources found") - self.issues['critical'].append("No LVMCluster configured") - return - - for cluster in self.lvmclusters: - name = cluster.get('metadata', {}).get('name', 'unknown') - status = cluster.get('status', {}) - - print(f"\n{Colors.BOLD}LVMCluster:{Colors.END} {name}") - - # Check state - state = status.get('state', 'Unknown') - ready = status.get('ready', False) - - if state == 'Ready' and ready: - print_success(f"State: {state}") - print_success(f"Ready: {ready}") - elif state == 'Progressing': - print_warning(f"State: {state}") - print_warning(f"Ready: {ready}") - self.issues['warning'].append(f"LVMCluster {name} in Progressing state") - else: - print_error(f"State: {state}") - print_error(f"Ready: {ready}") - self.issues['critical'].append(f"LVMCluster {name} not Ready (state: {state})") - - # Check conditions - conditions = status.get('conditions', []) - if conditions: - print(f"\n{Colors.BOLD}Conditions:{Colors.END}") - for cond in conditions: - cond_type = cond.get('type', 'Unknown') - cond_status = cond.get('status', 'Unknown') - reason = cond.get('reason', '') - message = cond.get('message', '') - - if cond_status == 'True': - print_success(f"{cond_type}: {cond_status}") - if reason: - print(f" Reason: {reason}") - else: - print_error(f"{cond_type}: {cond_status}") - if reason: - print(f" Reason: {reason}") - if message: - print(f" Message: {message}") - - self.issues['critical'].append( - f"LVMCluster {name} condition {cond_type}: {message or reason}" - ) - - # Check device class statuses - device_class_statuses = status.get('deviceClassStatuses', []) - if device_class_statuses: - print(f"\n{Colors.BOLD}Device Classes:{Colors.END}") - for dc_status in device_class_statuses: - dc_name = dc_status.get('name', 'unknown') - node_status = dc_status.get('nodeStatus', []) - - total_nodes = len(node_status) - ready_nodes = sum(1 for ns in node_status if ns.get('status') == 'Ready') - - print(f"\n Device Class: {dc_name}") - if ready_nodes == total_nodes and total_nodes > 0: - print_success(f" Nodes: {ready_nodes}/{total_nodes} Ready") - elif ready_nodes > 0: - print_warning(f" Nodes: {ready_nodes}/{total_nodes} Ready") - self.issues['warning'].append( - f"Device class {dc_name}: only {ready_nodes}/{total_nodes} nodes ready" - ) - else: - print_error(f" Nodes: {ready_nodes}/{total_nodes} Ready") - self.issues['critical'].append( - f"Device class {dc_name}: no nodes ready" - ) - - # Show failed nodes - failed_nodes = [ns.get('node') for ns in node_status if ns.get('status') != 'Ready'] - if failed_nodes: - print(f" Failed nodes: {', '.join(failed_nodes)}") - - def analyze_volume_groups(self): - """Analyze volume group status from LVMCluster deviceClassStatuses""" - print_section("VOLUME GROUP STATUS") - - if not self.lvmclusters: - print_warning("No LVMCluster resources found") - return - - # Extract VG info from LVMCluster status - for cluster in self.lvmclusters: - status = cluster.get('status', {}) - device_class_statuses = status.get('deviceClassStatuses', []) - - if not device_class_statuses: - print_warning("No device class status information found in LVMCluster") - return - - for dc_status in device_class_statuses: - vg_name = dc_status.get('name', 'unknown') - node_statuses = dc_status.get('nodeStatus', []) - - print(f"\n{Colors.BOLD}Volume Group/Device Class:{Colors.END} {vg_name}") - print(f"Nodes: {len(node_statuses)}") - - for node_status in node_statuses: - node_name = node_status.get('node', 'unknown') - status_state = node_status.get('status', 'Unknown') - devices = node_status.get('devices', []) - reason = node_status.get('reason', '') - - print(f"\n {Colors.BOLD}Node:{Colors.END} {node_name}") - - # Check status - if status_state == 'Ready': - print_success(f" Status: {status_state}") - elif status_state == 'Progressing': - print_warning(f" Status: {status_state}") - self.issues['warning'].append(f"VG {vg_name} on {node_name}: Progressing") - else: - print_error(f" Status: {status_state}") - self.issues['critical'].append(f"VG {vg_name} on {node_name}: {status_state}") - - # Show reason if failed/degraded - if reason: - print(f"\n {Colors.BOLD}Reason:{Colors.END}") - # Print first few lines of reason - for line in reason.split('\n')[:5]: - print(f" {line}") - if len(reason.split('\n')) > 5: - print(f" ... (truncated, see LVMCluster status for full details)") - self.issues['critical'].append(f"VG {vg_name} on {node_name}: {reason[:200]}") - - # Check devices - if devices: - valid_devices = [d for d in devices if d != '[unknown]'] - if valid_devices: - print(f"\n {Colors.BOLD}Devices:{Colors.END} {', '.join(valid_devices)}") - else: - print_warning(f" Devices: No valid devices (unknown)") - else: - print_warning(f" No devices configured") - - # Show excluded devices summary - excluded = node_status.get('excluded', []) - if excluded: - print(f"\n {Colors.BOLD}Excluded devices:{Colors.END} {len(excluded)} device(s)") - # Show first few exclusion reasons - for i, excl in enumerate(excluded[:3]): - name = excl.get('name', 'unknown') - reasons = excl.get('reasons', []) - if reasons: - print(f" - {name}: {reasons[0]}") - if len(excluded) > 3: - print(f" ... and {len(excluded) - 3} more excluded devices") - - def analyze_pvcs(self): - """Analyze PVC status for LVMS volumes""" - print_section("STORAGE (PVC/PV) STATUS") - - if not self.pvcs: - print_info("No PVCs using LVMS storage classes found") - return - - # Count by status - status_counts = defaultdict(int) - pending_pvcs = [] - - for pvc in self.pvcs: - phase = pvc.get('status', {}).get('phase', 'Unknown') - status_counts[phase] += 1 - - if phase != 'Bound': - pending_pvcs.append(pvc) - - print(f"Total LVMS PVCs: {len(self.pvcs)}") - for phase, count in sorted(status_counts.items()): - if phase == 'Bound': - print_success(f"{phase}: {count}") - else: - print_error(f"{phase}: {count}") - - # Analyze pending PVCs - if pending_pvcs: - print(f"\n{Colors.BOLD}Pending/Failed PVCs:{Colors.END}\n") - - for pvc in pending_pvcs: - name = pvc.get('metadata', {}).get('name', 'unknown') - namespace = pvc.get('metadata', {}).get('namespace', 'unknown') - phase = pvc.get('status', {}).get('phase', 'Unknown') - storage_class = pvc.get('spec', {}).get('storageClassName', 'unknown') - requested_size = pvc.get('spec', {}).get('resources', {}).get('requests', {}).get('storage', 'unknown') - - print(f"{Colors.BOLD}{namespace}/{name}{Colors.END}") - print_error(f" Status: {phase}") - print(f" Storage Class: {storage_class}") - print(f" Requested: {requested_size}") - - # Check for related events - related_events = [ - e for e in self.events - if e.get('involvedObject', {}).get('name') == name - and e.get('involvedObject', {}).get('namespace') == namespace - ] - - if related_events: - print(f"\n {Colors.BOLD}Recent Events:{Colors.END}") - for event in related_events[-3:]: # Last 3 events - event_type = event.get('type', 'Normal') - reason = event.get('reason', '') - message = event.get('message', '') - - if event_type == 'Warning': - print_warning(f" {reason}: {message}") - else: - print_info(f" {reason}: {message}") - - self.issues['critical'].append(f"PVC {namespace}/{name} in {phase} state") - print() - - def analyze_operator_health(self): - """Analyze LVMS operator and component pod health""" - print_section("OPERATOR HEALTH") - - # Analyze deployments - if self.deployments: - print(f"{Colors.BOLD}Deployments:{Colors.END}\n") - for deploy in self.deployments: - name = deploy.get('metadata', {}).get('name', 'unknown') - spec = deploy.get('spec', {}) - status = deploy.get('status', {}) - - desired = spec.get('replicas', 0) - ready = status.get('readyReplicas', 0) - - if ready == desired and desired > 0: - print_success(f"{name}: {ready}/{desired} replicas ready") - else: - print_error(f"{name}: {ready}/{desired} replicas ready") - self.issues['critical'].append(f"Deployment {name}: only {ready}/{desired} replicas ready") - - # Analyze daemonsets - if self.daemonsets: - print(f"\n{Colors.BOLD}DaemonSets:{Colors.END}\n") - for ds in self.daemonsets: - name = ds.get('metadata', {}).get('name', 'unknown') - status = ds.get('status', {}) - - desired = status.get('desiredNumberScheduled', 0) - ready = status.get('numberReady', 0) - - if ready == desired and desired > 0: - print_success(f"{name}: {ready}/{desired} nodes ready") - else: - print_warning(f"{name}: {ready}/{desired} nodes ready") - self.issues['warning'].append(f"DaemonSet {name}: only {ready}/{desired} nodes ready") - - # Analyze pod status - if self.pods: - problematic_pods = [ - pod for pod in self.pods - if pod.get('status', {}).get('phase') not in ['Running', 'Succeeded'] - ] - - if problematic_pods: - print(f"\n{Colors.BOLD}Problematic Pods:{Colors.END}\n") - - for pod in problematic_pods: - name = pod.get('metadata', {}).get('name', 'unknown') - phase = pod.get('status', {}).get('phase', 'Unknown') - - print_error(f"{name}: {phase}") - - # Check container statuses - container_statuses = pod.get('status', {}).get('containerStatuses', []) - for cs in container_statuses: - container_name = cs.get('name', 'unknown') - restart_count = cs.get('restartCount', 0) - - if restart_count > 0: - print(f" {container_name}: {restart_count} restarts") - - # Check waiting/terminated states - if cs.get('state', {}).get('waiting'): - reason = cs['state']['waiting'].get('reason', '') - message = cs['state']['waiting'].get('message', '') - print_warning(f" Waiting: {reason}") - if message: - print(f" Message: {message}") - - if cs.get('state', {}).get('terminated'): - reason = cs['state']['terminated'].get('reason', '') - message = cs['state']['terminated'].get('message', '') - exit_code = cs['state']['terminated'].get('exitCode', 0) - print_error(f" Terminated: {reason} (exit code: {exit_code})") - if message: - print(f" Message: {message}") - - self.issues['critical'].append(f"Pod {name} in {phase} state") - print() - - def analyze_storage_classes(self): - """Analyze TopoLVM storage class configuration""" - print_section("TOPOLVM CSI DRIVER") - - if not self.storage_classes: - print_warning("No TopoLVM storage classes found") - self.issues['warning'].append("No TopoLVM storage classes configured") - return - - print(f"{Colors.BOLD}Storage Classes:{Colors.END}\n") - - for sc in self.storage_classes: - name = sc.get('metadata', {}).get('name', 'unknown') - provisioner = sc.get('provisioner', 'unknown') - binding_mode = sc.get('volumeBindingMode', 'Immediate') - parameters = sc.get('parameters', {}) - - print_success(f"{name}") - print(f" Provisioner: {provisioner}") - print(f" Binding Mode: {binding_mode}") - - if parameters: - print(f" Parameters:") - for key, value in parameters.items(): - print(f" {key}: {value}") - print() - - def analyze_pod_logs(self): - """Analyze pod logs for errors and warnings""" - print_section("POD LOGS ANALYSIS") - - if not self.pod_logs: - print_info("No error or warning messages found in pod logs") - return - - # Group logs by pod - logs_by_pod = defaultdict(list) - for log_entry in self.pod_logs: - logs_by_pod[log_entry['pod']].append(log_entry) - - for pod_name, entries in sorted(logs_by_pod.items()): - print(f"\n{Colors.BOLD}Pod:{Colors.END} {pod_name}") - print(f"Unique errors/warnings: {len(entries)}\n") - - for entry in sorted(entries, key=lambda x: x['ts']): - level = entry['level'] - timestamp = entry['ts'] - msg = entry['msg'] - error = entry['error'] - controller = entry['controller'] - - if level == 'error': - print_error(f"{timestamp}: {msg}") - else: - print_warning(f"{timestamp}: {msg}") - - if controller: - print(f" Controller: {controller}") - - if error: - # Split multi-line errors for better readability - error_lines = error.split('\n') - if len(error_lines) > 1: - print(f" {Colors.BOLD}Error Details:{Colors.END}") - for i, line in enumerate(error_lines[:10]): # Show first 10 lines - if line.strip(): - print(f" {line}") - if len(error_lines) > 10: - print(f" ... ({len(error_lines) - 10} more lines)") - else: - print(f" Error: {error}") - - # Track critical issues from logs - if level == 'error': - self.issues['critical'].append(f"Pod {pod_name}: {msg}") - else: - self.issues['warning'].append(f"Pod {pod_name}: {msg}") - - print() - - def generate_summary(self): - """Generate final summary and recommendations""" - print_section("LVMS ANALYSIS SUMMARY") - - critical_count = len(self.issues['critical']) - warning_count = len(self.issues['warning']) - info_count = len(self.issues['info']) - - if critical_count == 0 and warning_count == 0: - print_success(f"No critical issues or warnings found") - print_info("LVMS appears to be healthy") - else: - if critical_count > 0: - print_error(f"CRITICAL ISSUES: {critical_count}") - for issue in self.issues['critical']: - print(f" - {issue}") - print() - - if warning_count > 0: - print_warning(f"WARNINGS: {warning_count}") - for issue in self.issues['warning']: - print(f" - {issue}") - print() - - # Recommendations - if critical_count > 0 or warning_count > 0: - print_section("RECOMMENDATIONS") - - if critical_count > 0: - print(f"{Colors.BOLD}CRITICAL (Fix Immediately):{Colors.END}\n") - - # Check for common patterns - if any('PVC' in issue and 'Pending' in issue for issue in self.issues['critical']): - print("1. Investigate pending PVCs:") - print(" - Check volume group status on nodes") - print(" - Verify sufficient free space in volume groups") - print(" - Check vg-manager pods are running") - print(" - Review events for provisioning errors") - print() - - if any('not Ready' in issue or 'not ready' in issue for issue in self.issues['critical']): - print("2. Fix LVMCluster/VG readiness:") - print(" - Check node device availability") - print(" - Verify devices are not in use by other systems") - print(" - Review vg-manager pod logs") - print(" - Ensure devices match deviceSelector criteria") - print() - - if any('Pod' in issue for issue in self.issues['critical']): - print("3. Fix failing pods:") - print(" - Review pod logs in must-gather") - print(" - Check for image pull errors") - print(" - Verify node resources available") - print() - - if warning_count > 0: - print(f"\n{Colors.BOLD}WARNINGS (Address Soon):{Colors.END}\n") - - if any('DaemonSet' in issue for issue in self.issues['warning']): - print("- Investigate DaemonSet node coverage") - print(" Check node taints and tolerations") - print() - - def run_analysis(self, component: str = 'all'): - """Run the complete analysis""" - if not self.validate_must_gather(): - return 1 - - self.load_resources() - self.load_pod_logs() - - if component in ['all', 'operator']: - self.analyze_lvmcluster() - - if component in ['all', 'volumes', 'vg']: - self.analyze_volume_groups() - - if component in ['all', 'storage', 'pvc']: - self.analyze_pvcs() - - if component in ['all', 'operator', 'pods']: - self.analyze_operator_health() - - if component in ['all', 'storage']: - self.analyze_storage_classes() - - if component in ['all', 'operator', 'pods', 'logs']: - self.analyze_pod_logs() - - self.generate_summary() - - # Return exit code based on issues - return 1 if self.issues['critical'] else 0 - - -def main(): - parser = argparse.ArgumentParser( - description='Analyze LVMS must-gather data', - formatter_class=argparse.RawDescriptionHelpFormatter - ) - parser.add_argument( - 'must_gather_path', - help='Path to the LVMS must-gather directory' - ) - parser.add_argument( - '--component', - choices=['all', 'storage', 'operator', 'volumes', 'vg', 'pvc', 'pods', 'logs'], - default='all', - help='Component to analyze (default: all)' - ) - - args = parser.parse_args() - - analyzer = LVMSAnalyzer(args.must_gather_path) - exit_code = analyzer.run_analysis(args.component) - - sys.exit(exit_code) - - -if __name__ == '__main__': - main() diff --git a/plugins/native-notifications/.claude-plugin/plugin.json b/plugins/native-notifications/.claude-plugin/plugin.json deleted file mode 100644 index 8c2ad1a07..000000000 --- a/plugins/native-notifications/.claude-plugin/plugin.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "name": "native-notifications", - "description": "Cross-platform desktop notifications (macOS, Linux desktop, Linux headless)", - "version": "0.0.2", - "author": { - "name": "openshift" - } -} diff --git a/plugins/native-notifications/README.md b/plugins/native-notifications/README.md deleted file mode 100644 index 5ca7ee895..000000000 --- a/plugins/native-notifications/README.md +++ /dev/null @@ -1,48 +0,0 @@ -# Cross-platform Notifications Plugin - -This plugin enables Claude to notify the user via native system notifications when it is done with a prompt or requires user input. - -## How It Works - -The plugin uses Claude Code's [hook system](https://docs.claude.com/en/docs/claude-code/hooks) to send a notification when it is done. -It automatically detects the platform and uses the best available notification method: - -| Platform | Method | -|---|---| -| macOS | `osascript` (native macOS notifications) | -| Linux with desktop session (`$DISPLAY` / `$WAYLAND_DISPLAY`) | `notify-send` (libnotify) | -| Linux headless / SSH (no display) | terminal bell (`\a`) | - -### Hook Configuration - -The plugin is defined in `plugins/macos-notifications/hooks/hooks.json`. -To customize the contents displayed by the notification, edit the script call: - -```json -{ - "description": "Cross-platform desktop notifications (macOS, Linux desktop, Linux headless)", - "hooks": { - "Notification": [ - { - "hooks": [ - { - "type": "command", - "command": "if command -v osascript &>/dev/null; then osascript -e 'display notification \"Claude needs your input\" with title \"🔔 Claude Code\"'; elif command -v notify-send &>/dev/null && [ -n \"${DISPLAY:-}${WAYLAND_DISPLAY:-}\" ]; then notify-send '🔔 Claude Code' 'Claude needs your input'; else printf '\\a'; fi" - } - ] - } - ], - "Stop": [ - { - "hooks": [ - { - "type": "command", - "command": "if command -v osascript &>/dev/null; then osascript -e 'display notification \"Claude finished your task\" with title \"✅ Claude Code\"'; elif command -v notify-send &>/dev/null && [ -n \"${DISPLAY:-}${WAYLAND_DISPLAY:-}\" ]; then notify-send '✅ Claude Code' 'Claude finished your task'; else printf '\\a'; fi" - } - ] - } - ] - } -} -``` - diff --git a/plugins/native-notifications/hooks/hooks.json b/plugins/native-notifications/hooks/hooks.json deleted file mode 100644 index 1ed96543d..000000000 --- a/plugins/native-notifications/hooks/hooks.json +++ /dev/null @@ -1,25 +0,0 @@ -{ - "description": "Cross-platform desktop notifications (macOS, Linux desktop, Linux headless)", - "hooks": { - "Notification": [ - { - "hooks": [ - { - "type": "command", - "command": "if command -v osascript >/dev/null 2>&1; then osascript -e 'display notification \"Claude needs your input\" with title \"🔔 Claude Code\"'; elif command -v notify-send >/dev/null 2>&1 && [ -n \"${DISPLAY:-}${WAYLAND_DISPLAY:-}\" ]; then notify-send '🔔 Claude Code' 'Claude needs your input'; else printf '\\a'; fi" - } - ] - } - ], - "Stop": [ - { - "hooks": [ - { - "type": "command", - "command": "if command -v osascript >/dev/null 2>&1; then osascript -e 'display notification \"Claude finished your task\" with title \"✅ Claude Code\"'; elif command -v notify-send >/dev/null 2>&1 && [ -n \"${DISPLAY:-}${WAYLAND_DISPLAY:-}\" ]; then notify-send '✅ Claude Code' 'Claude finished your task'; else printf '\\a'; fi" - } - ] - } - ] - } -} diff --git a/plugins/origin/.claude-plugin/plugin.json b/plugins/origin/.claude-plugin/plugin.json deleted file mode 100644 index 285629b2b..000000000 --- a/plugins/origin/.claude-plugin/plugin.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "name": "origin", - "description": "Helpers for openshift/origin development.", - "version": "0.0.2", - "author": { - "name": "openshift" - } -} diff --git a/plugins/origin/README.md b/plugins/origin/README.md deleted file mode 100644 index a60100d35..000000000 --- a/plugins/origin/README.md +++ /dev/null @@ -1,124 +0,0 @@ -# Origin Plugin - -Utilities and workflow helpers for developing and reviewing changes in the -openshift/origin repository. -This plugin focuses on improving test quality, code consistency, and CI suite -integration for Origin contributions. - -## Commands - -### /origin:two-node-origin-pr-helper - -Expert review tool for PRs that add or modify Two Node (Fencing or Arbiter) tests -under test/extended/two_node/. - -This command performs: - -- Automatic discovery of changed Two Node test files -- Analysis of Ginkgo Describe/It blocks, suite tags, and Serial annotations -- Review of test logic, determinism, cleanup behavior, and structure -- Suggestions for reusing existing Origin and Kubernetes utilities -- Identification of duplicated logic that should use shared helpers -- Recommendations for suite placement and Serial usage -- Recommendations for CI lane coverage in openshift/release -- Generation of ready-to-paste PR text for both Origin and Release repositories - -Use this helper when contributing to Origin’s Two Node test suite or reviewing PRs -that affect Two Node behavior. - -See the commands/ directory for full documentation. - -## Installation - -### From the Claude Code Plugin Marketplace - -1. Add the OpenShift ai-helpers marketplace: - - /plugin marketplace add openshift-eng/ai-helpers - -2. Install the origin plugin: - - /plugin install origin@ai-helpers - -3. Use the command: - - /origin:two-node-origin-pr-helper - -## Available Commands - -### Two Node PR Review - -#### /origin:two-node-origin-pr-helper — Review Two Node Tests in Origin - -This command performs a full expert review of PRs that modify or add Two Node -(Fencing or Arbiter) tests under test/extended/two_node/. - -The helper covers: - -- Code correctness and logical consistency -- Ginkgo test structure and best practices -- Suite tagging and Serial analysis -- Utility/helper reuse (Origin + Kubernetes) -- CI suite and lane coverage recommendations -- PR description generation - -See commands/two-node-origin-pr-helper.md for full documentation. - -## Development - -### Adding New Commands - -To add a new command to this plugin: - -1. Create a markdown file in commands/: - - touch plugins/origin/commands/your-command.md - -2. Use existing commands as a template and include sections: - - - Name - - Synopsis - - Description - - Implementation behavior - - Return value / output structure - - Examples - - Arguments - - Error handling - - Additional context if needed - -3. Test the command: - - /origin:your-command - -## Plugin Structure - -plugins/origin/ -├── .claude-plugin/ -│ └── plugin.json -├── commands/ -│ └── two-node-origin-pr-helper.md -└── README.md - -## Related Plugins - -- openshift — General OpenShift development and CI helpers -- ci — Prow/CI-related workflow helpers -- git — Git workflow helpers -- jira — Jira automation helpers -- utils — General-purpose utilities - -## Contributing - -Contributions are welcome. - -When adding Origin-specific commands: - -- Ensure the workflow relates directly to openshift/origin -- Follow existing documentation patterns -- Provide actionable examples and behavior explanations -- Use realistic Origin repository paths and test patterns -- Update this README with any new commands - -## License - -See [LICENSE](../../LICENSE) for details. diff --git a/plugins/origin/commands/two-node-origin-pr-helper.md b/plugins/origin/commands/two-node-origin-pr-helper.md deleted file mode 100644 index 4bf35e8fb..000000000 --- a/plugins/origin/commands/two-node-origin-pr-helper.md +++ /dev/null @@ -1,174 +0,0 @@ ---- -description: Expert review tool for PRs that add or modify Two Node (Fencing or Arbiter) tests under test/extended/two_node/ in openshift/origin. -argument-hint: "[--url PR_URL] [] [--depth quick|full]" ---- - -## Name - -/origin:two-node-origin-pr-helper — Review Two Node (Fencing/Arbiter) tests in openshift/origin. - -## Synopsis -``` -/origin:two-node-origin-pr-helper [--url PR_URL] [] [--depth quick|full] -``` -## Description - -The /origin:two-node-origin-pr-helper command is an expert review tool for PRs that add or modify -Two Node (Fencing or Arbiter) tests under test/extended/two_node/ in openshift/origin. - -It: - -- Discovers changed Two Node test files from the current branch. -- Analyzes Ginkgo Describe / Context / It blocks, suite tags, and [Serial] markers. -- Reviews test logic, structure, cleanup, and determinism. -- Suggests reuse of existing Origin and Kubernetes helpers instead of ad-hoc code. -- Recommends suite + [Serial] tagging and CI coverage. -- Generates ready-to-paste PR description text for the Origin PR. -- Suggests CI lane characteristics for openshift/release (without generating full PR text). - -Use this command when creating or reviewing Origin PRs that touch the Two Node test suite and you -want a focused, reproducible review of test design, helper usage, and CI integration. - -This is a specialized Origin review helper focused on Two Node tests and is intended as a building -block toward a future generic Origin review command. - -## Implementation - -The command should behave as follows. - -### 1. Argument handling - -Parse arguments from the invocation: - -- --url: - - Optional full PR URL (example: ) - - When provided, this takes precedence over any local git information. - -- (optional positional): - - Optional PR number (example: 30510) - -- --depth: - - quick: short, high-level summary - - full: detailed four-section output (default) - -Default behavior: - -- If --url is provided, use that PR. -- Else if is provided, use that PR in the current repo. -- Else infer the PR from the current git repository remote and branch name. -- Fail with a clear error message if the PR cannot be determined. - -### 2. Automatically discover relevant changes - -Assume the command is run inside a local checkout of the repo. - -- Determine changed files using git diff. -- Filter to Go files under test/extended/two_node/. -- Parse: - - Ginkgo Describe / Context / It blocks - - Suite tags - - [Serial] markers - - Helper imports - -### 3. Review test design and correctness - -For each test: - -- Validate alignment between intent and implementation. -- Validate degraded vs non-degraded behavior. -- Validate fencing vs arbiter semantics. -- Validate quorum, failover, and recovery expectations. - -Do not assume helper existence. Infer from imports and logic only. - -### 4. Suggest reuse of utilities and helpers - -Look for re-implemented logic where helpers already exist. - -Examples: - -- Origin utilities under github.com/openshift/origin/test/extended/util -- Kubernetes helpers under k8s.io/apimachinery and k8s.io/utils - -Call out: - -- Correct helper usage -- Missed reuse opportunities -- Duplication that should become shared Two Node helpers - -### 5. Evaluate structure and readability - -Review: - -- Describe / Context / It hierarchy -- By(...) usage -- Assertion clarity -- Avoidance of time.Sleep in favor of polling - -### 6. Recommend suite and Serial annotations - -- Prefer [Suite:openshift/two-node] for Two Node tests. -- Recommend [Serial] for: - - Cluster-scoped mutations - - Reboots - - Degradation or fencing actions - -- Recommend parallel for isolated, namespaced tests. - -Always explain why. - -### 7. Propose CI lane coverage - -- Determine if existing CI already covers the tests. -- If not, propose: - - Topology - - TEST_SUITE - - Feature gates - - Blocking vs periodic vs optional - -Do not hard-code lane names. - -### 8. Generate ready-to-paste text - -Produce: - -- Origin PR summary text -- Optional CI lane summary text (not a full release PR) - -The command is static and requires no cluster access. - ---- - -## Expected input - -/origin:two-node-origin-pr-helper --depth full -/origin:two-node-origin-pr-helper 30510 --depth full -/origin:two-node-origin-pr-helper --url --depth quick - ---- - -## Output structure - -Always respond in four sections: - -1. Summary of changes -2. Review of tests (design, logic, reuse) -3. Suite, Serial, and CI recommendations -4. Ready-to-paste text - -Respect --depth only: - -- quick → compact output -- full → detailed output - ---- - -## Example 1 — Degraded Two Node Fencing tests - -/origin:two-node-origin-pr-helper 30510 --depth full - ---- - -## Example 2 — Two Node Arbiter recovery tests - -/origin:two-node-origin-pr-helper --url --depth quick diff --git a/plugins/session/.claude-plugin/plugin.json b/plugins/session/.claude-plugin/plugin.json deleted file mode 100644 index 742a30080..000000000 --- a/plugins/session/.claude-plugin/plugin.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "name": "session", - "description": "A plugin to save and resume conversation sessions across long time intervals", - "version": "0.0.2", - "author": { - "name": "github.com/kuiwang02" - } -} diff --git a/plugins/session/README.md b/plugins/session/README.md deleted file mode 100644 index c55ac2bb2..000000000 --- a/plugins/session/README.md +++ /dev/null @@ -1,20 +0,0 @@ -# Session Plugin - -Claude Code session management and persistence utilities. - -## Commands - -### `/session:save-session` - -Save the current conversation session to a markdown file for future continuation. - -This command captures the conversation context, allowing you to resume long-running tasks across multiple sessions. - -See [commands/save-session.md](commands/save-session.md) for full documentation. - -## Installation - -```bash -/plugin install session@ai-helpers -``` - diff --git a/plugins/session/commands/save-session.md b/plugins/session/commands/save-session.md deleted file mode 100644 index f93bc8867..000000000 --- a/plugins/session/commands/save-session.md +++ /dev/null @@ -1,138 +0,0 @@ ---- -description: Save current conversation session to markdown file for future continuation -argument-hint: "[optional-description]" ---- - -## Name -session:save-session - -## Synopsis - -``` -/save-session -/save-session [description] -``` - -## Description - -Saves the current conversation session to a comprehensive markdown file that enables seamless resumption of work after extended time intervals (days, weeks, or months). - -This command addresses limitations of Claude Code's built-in session management by capturing: -- Complete conversation context and technical rationale -- Detailed file modification tracking with line numbers -- Key technical decisions and alternatives considered -- Commands executed during the session -- Clear resumption instructions - -The generated session file is designed for engineers working across multiple projects with long gaps between sessions, providing all necessary context to continue work without losing momentum. - -## Implementation - -The command follows a five-phase process: - -### Phase 0: Input Sanitization -If a description argument is provided, sanitize it for safe filename usage: -- Convert all spaces to hyphens -- Convert to lowercase -- Remove or replace special characters (keep only alphanumeric, hyphens, and underscores) -- Truncate to 100 characters maximum if longer -- Example: "investigating OCPBUGS-12345 regarding routes" → "investigating-ocpbugs-12345-regarding-routes" - -### Phase 1: Context Analysis -- Summarizes main topics and goals discussed -- Lists all accomplishments and completed tasks -- Identifies all files that were read, modified, or created -- Extracts important technical decisions and their rationale -- Captures any error messages encountered and how they were resolved -- Notes any commands that were run (make, linter, tests, etc.) - -### Phase 2: File Modification Tracking -- Reads and verifies current state of modified files -- Lists specific line numbers and code changes -- Includes before/after comparisons for critical changes -- Notes which files were created vs modified vs deleted -- Tracks any generated files (like bindata) - -### Phase 3: Session File Creation -Creates a comprehensive markdown document with these sections: - -1. **Session Summary** - Brief 1-2 paragraph overview -2. **Current State** - Status of work and modifications -3. **Accomplishments** - Detailed completion checklist -4. **Files Modified** - Organized by Created/Modified/Deleted -5. **Key Technical Decisions** - Rationale and implications -6. **Pending Tasks** - Unfinished work (checkbox format) -7. **Commands Used** - All executed commands -8. **Context for Resumption** - Critical continuation information -9. **Full Conversation Summary** - Key discussion points -10. **Next Steps** - Clear action items -11. **How to Resume This Session** - Step-by-step guide - -### Phase 4: Verification and Output -- Confirms file was created successfully -- Displays file path and size -- Provides brief summary of what was saved -- Shows resumption instructions in terminal and saved file - -## Return Value - -Creates a markdown file in the repository root directory with filename: -- `session-YYYY-MM-DD-HHMM.md` (without description) -- `session-YYYY-MM-DD-.md` (with custom description) - -Terminal output: -``` -✅ Session saved successfully! - -File: session-YYYY-MM-DD-description.md (XX KB) -Location: /full/path/to/file - -📖 To resume this session: - Please read `/full/path/to/session-YYYY-MM-DD-description.md` and continue from where we left off -``` - -## Examples - -**Basic usage with auto-generated timestamp:** -``` -/save-session -``` -Creates: `session-2025-10-16-1430.md` - -**With custom description for easy identification:** -``` -/save-session parallel-test-fixes -``` -Creates: `session-2025-10-16-parallel-test-fixes.md` - -**Multiple sessions in one project:** -``` -/save-session initial-implementation -/save-session pr-review-feedback -/save-session final-testing -``` - -**With spaces and special characters (automatically sanitized):** -``` -/save-session investigating OCPBUGS-12345 regarding routes -``` -Creates: `session-2025-10-16-investigating-ocpbugs-12345-regarding-routes.md` - -**Resuming a saved session:** -Open Claude Code and say: -``` -Please read `/path/to/session-2025-10-16-parallel-test-fixes.md` and continue from where we left off -``` - -## Arguments - -**description** (optional) -- Custom identifier appended to the filename -- Helps identify the session purpose when resuming after long intervals -- **Input handling**: Description is automatically sanitized for safe filename usage (spaces converted to hyphens, special characters removed, truncated to 100 chars if needed) -- **Good examples**: `feature-name`, `bug-fix`, `refactoring`, `investigating-ocpbugs-12345` -- Automatically added to filename: `session-YYYY-MM-DD-.md` - -If no description is provided, timestamp alone is used: `session-YYYY-MM-DD-HHMM.md` - -**Note**: You can use spaces and special characters in your description - they will be automatically sanitized. For example, "investigating OCPBUGS-12345 regarding routes" becomes "investigating-ocpbugs-12345-regarding-routes". diff --git a/plugins/test-coverage/.claude-plugin/plugin.json b/plugins/test-coverage/.claude-plugin/plugin.json deleted file mode 100644 index 73f000c58..000000000 --- a/plugins/test-coverage/.claude-plugin/plugin.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "name": "test-coverage", - "description": "Analyze code coverage and identify untested paths", - "version": "0.0.2", - "author": { - "name": "github.com/openshift-eng" - } -} diff --git a/plugins/test-coverage/README.md b/plugins/test-coverage/README.md deleted file mode 100644 index 114b1fea6..000000000 --- a/plugins/test-coverage/README.md +++ /dev/null @@ -1,21 +0,0 @@ -# Test Coverage Plugin - -Analyze e2e/integration test code structure without running tests to identify coverage gaps. - -## Commands - -### `/test-coverage:analyze` - -Analyze e2e/integration test code structure without running tests to identify files and functions without test coverage. - -### `/test-coverage:gaps` - -Identify missing test coverage in OpenShift/Kubernetes test files with intelligent gap analysis for any component. - -See the [commands/](commands/) directory for full documentation of each command. - -## Installation - -```bash -/plugin install test-coverage@ai-helpers -``` diff --git a/plugins/test-coverage/commands/analyze.md b/plugins/test-coverage/commands/analyze.md deleted file mode 100644 index cc17d2f79..000000000 --- a/plugins/test-coverage/commands/analyze.md +++ /dev/null @@ -1,358 +0,0 @@ ---- -description: Analyze test code structure without running tests to identify coverage gaps -argument-hint: [--output ] [--priority ] [--test-structure-only] ---- - -## Name -test-coverage:analyze - -## Synopsis -``` -/test-coverage:analyze [--output ] [--priority ] [--test-structure-only] -``` - -## Description - -The `test-coverage:analyze` command analyzes test code structure **without running tests**. This command examines test files and source files to identify: -- What e2e/integration tests exist in the codebase -- What source code has corresponding tests -- What source code lacks tests -- Test organization and coverage gaps - -**Focus on E2E Tests:** By default, this command focuses on e2e (end-to-end) and integration tests, excluding unit tests. This ensures analysis targets higher-level test coverage gaps that validate real-world scenarios and system integration. - -**Language Support:** This command currently supports Go projects only. - -This command is the foundation for QE or Dev teams to understand their e2e test coverage baseline and identify areas requiring additional testing. - -## Arguments - -- ``: Path or URL to source code directory/file to analyze - - **Local path**: `./pkg/`, `/home/user/project/test/e2e/networking/infw.go` - - **GitHub URL**: `https://github.com/owner/repo/blob/main/test/file_test.go` - - **GitHub raw URL**: `https://raw.githubusercontent.com/owner/repo/main/test/file_test.go` - - **GitLab URL**: `https://gitlab.com/owner/repo/-/blob/main/test/file_test.go` - - **HTTP(S) URL**: Any direct file URL - - URLs are automatically downloaded and cached in `.work/test-coverage/cache/` - -- `--output `: Output directory for generated reports (default: `.work/test-coverage/analyze/`) - - Generates HTML report, JSON summary, and text summary - -- `--priority `: Filter results by priority (optional) - - Values: `all`, `high`, `medium`, `low` - - Default: `all` - -- `--include-test-utils`: Include test utility/helper files in analysis (optional) - - By default, utility files are excluded (*_util.go, *_utils.go, *_helper.go, helpers.go, etc.) - - Use this flag to analyze test utility functions for e2e test coverage - - Useful for auditing test infrastructure code - -- `--include-unit-tests`: Include unit tests in analysis (optional) - - By default, only e2e/integration tests are analyzed - - Use this flag to include unit tests in the coverage analysis - - E2E tests are identified by: - - File naming patterns (e.g., *e2e*_test.go, *integration*_test.go) - - Directory location (e.g., test/e2e/, test/integration/) - - Test markers (e.g., [Serial], [Disruptive] for Ginkgo) - -- `--test-pattern `: Custom test file pattern (optional) - - Example: `--test-pattern "**/*_test.go,**/test_*.go"` - -- `--test-structure-only`: Analyze only test file structure, skip source file analysis (optional) - - When enabled, analyzes ONLY test files to document what tests exist and what they cover - - Does NOT look for corresponding source files or identify coverage gaps - - Useful for: - - Understanding test organization and structure - - Documenting test cases in existing test files - - Quick analysis of a single test file or test directory - - Generating test documentation (test cases, helper functions, resource types) - - Generates reports focused on test structure: - - Test cases and their purpose - - Helper functions used in tests - - Resource types and test utilities - - Protocol/feature coverage matrices - - Much faster than full coverage analysis - - Example: `/test-coverage:analyze ./test/extended/networking/infw.go --test-structure-only` - -## Implementation - -The command uses test structure analysis (backend) to analyze test files and source files without running tests. - -**Two Analysis Modes:** - -1. **Full Coverage Analysis (default):** Analyzes both test files AND source files to identify coverage gaps - - Maps tests to source code - - Identifies untested files and functions - - Generates coverage gap reports - - Follows Steps 1-8 below - -2. **Test Structure Only (--test-structure-only):** Analyzes ONLY test files to document their structure - - Extracts test cases, helper functions, resource types - - Documents what each test covers - - Does NOT analyze source files or identify gaps - - Much faster, useful for test documentation - - Follows Steps 1, 2 (test files only), 3, 7 (test-focused reports) - -### Step 1: Resolve Input - -1. **Resolve input path or URL**: - - If input is a URL: - - Convert GitHub/GitLab blob URLs to raw URLs - - Download file to `.work/test-coverage/cache/` - - Use cached version if already downloaded - - Extract to temporary directory if it's an archive (zip, tar.gz) - - If input is a local path: - - Convert to absolute path - - Validate that path exists and is readable -2. Load Go-specific configuration (test patterns, source patterns, parsers) - -### Step 2: Discover Test and Source Files - -**Note:** If `--test-structure-only` is used, only test files are discovered; source file discovery is skipped. - -1. Walk directory tree, excluding common directories: - - `vendor/`, `node_modules/`, `__pycache__/`, `.git/`, etc. - - Unit test directories (e.g., `test/unit/`, `unit/`) unless `--include-unit-tests` is specified -2. Identify e2e/integration test files based on: - - **File naming patterns:** - - `*e2e*_test.go`, `*integration*_test.go` - - **Directory location:** - - `test/e2e/`, `test/integration/`, `e2e/`, `integration/` - - **Content markers (if file can be read):** - - Ginkgo markers: `[Serial]`, `[Disruptive]`, `g.Describe(`, `g.It(` -3. Identify source files based on language patterns -4. Filter out test utility/helper files unless `--include-test-utils` is specified: - - `*_util.go`, `*_utils.go`, `*_helper.go`, `helpers.go` -5. Apply `--exclude` patterns if specified - -### Step 3: Parse Test Files - -For each test file: -1. Extract test functions using Go patterns: - - Functions matching `func Test*`, `func Benchmark*` -2. Extract imports to identify tested modules -3. Extract function calls within tests (potential test targets) -4. Infer corresponding source file: - - `handler_test.go` → `handler.go` - -### Step 4: Parse Source Files - -**Note:** This step is skipped when `--test-structure-only` is used. - -For each source file: -1. Extract functions/methods using Go patterns -2. Determine function visibility: - - Exported (capitalized) vs private (lowercase) -3. Calculate cyclomatic complexity (count decision points) -4. Record line ranges for each function - -### Step 5: Map Tests to Source Code - -**Note:** This step is skipped when `--test-structure-only` is used. - -1. Create mapping between test files and source files -2. For each function in source files: - - Find tests that reference this function - - Mark as tested/untested - - Count number of tests covering it -3. Calculate file-level statistics: - - Total functions - - Tested functions - - Untested functions - - Function coverage percentage - -### Step 6: Identify Coverage Gaps - -**Note:** This step is skipped when `--test-structure-only` is used. - -1. **Untested Files**: Source files with no corresponding test file - - Priority: High if file has exported/public functions -2. **Untested Functions**: Functions not referenced in any tests - - Priority: High for exported/public, Low for private -3. **Partially Tested Files**: Files with some but not all functions tested - - Priority: Based on percentage of untested functions -4. Apply `--priority` filter if specified - -### Step 7: Generate Reports - -**In test-structure-only mode**, generates test-focused reports: -- `test-structure-report.html` - Interactive report with test cases, helper functions, resource types -- `test-structure-analysis.json` - Machine-readable test metadata -- `test-structure-summary.txt` - Terminal-friendly test summary - -**In full coverage mode**, generates coverage gap reports: - -1. **HTML Report** (`test-coverage-report.html`): - - Interactive web-based report combining structure summary and gaps - - Includes collapsible sections for test cases, helper functions, and gaps - - Color-coded priority indicators (High=red, Medium=yellow, Low=blue) - - Coverage matrices for protocols and IP stacks - - Filterable gaps by priority - - Recommendations section - - Best viewed in a web browser - -2. **JSON Report** (`test-structure-report.json`): - - Complete gap data with file paths, functions, priorities - - Machine-readable format for automation and CI/CD integration - -3. **Text Summary** (`test-structure-summary.txt`): - - Overall statistics (files with/without tests, function coverage) - - High-priority gaps - - Recommendations - - Plain text format for console viewing - -4. **Console Output**: - - Summary of findings - - Paths to all generated reports - -### Step 8: Invoke Test Structure Analyzer - -**Invoke the analyze skill** to generate analyzer script at runtime and execute analysis. The skill will: -- Generate the analyzer from the specification in SKILL.md -- Execute analysis on the source directory -- Generate all three report formats (HTML, JSON, Text) - -## Return Value - -- **Format**: Terminal output with summary + generated report files - -**Terminal Output:** -``` -Test Structure Analysis Complete - -Summary: - Total Source Files: 45 - Files With Tests: 30 (66.7%) - Files Without Tests: 15 (33.3%) - - Total Functions: 234 - Tested Functions: 189 (80.8%) - Untested Functions: 45 (19.2%) - -High Priority Gaps: - 1. pkg/config.go - No test file (3 exported functions) - 2. pkg/validator.go - No test file (5 exported functions) - 3. cmd/server/auth.go - Partially tested (4/8 functions) - -Reports Generated: - HTML Report: .work/test-coverage/analyze/test-coverage-report.html - JSON Report: .work/test-coverage/analyze/test-structure-report.json - Text Summary: .work/test-coverage/analyze/test-structure-summary.txt - -Recommendations: - - Create test files for 15 untested source files - - Add tests for 45 untested functions - - Focus on high-priority gaps first -``` - -**Exit Status:** -- 0: Analysis successful -- 2: Analysis failed (parsing error, invalid input) - -## Examples - -### Example 1: Analyze e2e test structure without running tests (Go project) - -``` -/test-coverage:analyze ./pkg/ -``` - -Analyzes e2e/integration test file structure for a Go project to identify untested functions and files without running any tests. Unit tests are excluded by default. - -### Example 2: Analyze test structure with high priority filter - -``` -/test-coverage:analyze ./pkg/ --priority high -``` - -Analyzes Go test structure and shows only high-priority gaps (files without tests, untested public functions). - -### Example 3: Analyze with custom output directory - -``` -/test-coverage:analyze ./pkg/ --output reports/test-gaps/ -``` - -Analyzes test structure and generates reports in custom output directory. - -### Example 4: Analyze only test file structure (single file) - -``` -/test-coverage:analyze ./test/extended/networking/infw.go --test-structure-only -``` - -Analyzes ONLY the test file structure without looking for source files. Generates documentation showing: -- All test cases and what they cover -- Helper functions used in tests -- Resource types and test utilities -- Coverage matrices (protocols, IP stacks, platforms) - -Useful for quickly understanding what a test file covers without analyzing source code. - -### Example 5: Analyze test directory structure only - -``` -/test-coverage:analyze ./test/e2e/ --test-structure-only -``` - -Analyzes all test files in the e2e directory to document the test suite structure without source file analysis. - -### Example 6: Analyze remote test file from GitHub - -``` -/test-coverage:analyze https://github.com/openshift/origin/blob/master/test/extended/networking/infw.go --test-structure-only -``` - -Downloads and analyzes a test file directly from GitHub. The command automatically converts the GitHub blob URL to a raw URL and caches the download. - -### Example 7: Analyze remote test file using raw URL - -``` -/test-coverage:analyze https://raw.githubusercontent.com/openshift/origin/master/test/extended/networking/infw.go --test-structure-only -``` - -Analyzes a test file using the raw GitHub URL directly. - -### Example 8: Analyze test file with forced re-download - -To force re-downloading a cached URL, simply delete the cache and run again: - -``` -rm -rf .work/test-coverage/cache/ -/test-coverage:analyze https://github.com/user/repo/blob/main/test/file_test.go --test-structure-only -``` - -## Prerequisites - -### Python Dependencies - -The command uses Python for parsing and report generation. No external packages are required - only standard library modules are used. - -### Checking Prerequisites - -The command will automatically check for Python 3.8+ and provide installation instructions if missing. - -## Notes - -- **URL Support:** The command accepts both local paths and URLs (GitHub, GitLab, or any HTTP(S) URL) - - Remote files are automatically detected, downloaded, and cached - - Downloaded files are cached in `.work/test-coverage/cache/` for reuse - - GitHub blob URLs are automatically converted to raw URLs - - Clear cache with `rm -rf .work/test-coverage/cache/` to force re-download -- **E2E Focus:** By default, this command focuses on e2e/integration tests. Use `--include-unit-tests` to include unit tests. -- **Two Modes:** Use `--test-structure-only` to analyze only test files (fast, for documentation), or omit it for full coverage gap analysis -- This command analyzes test structure without running tests, making it very fast -- Works even if tests are broken or code doesn't compile -- Useful for identifying e2e test coverage gaps during development and code review -- HTML report provides interactive visualization of e2e test coverage and gaps -- JSON output enables integration with CI/CD pipelines and dashboards -- Text summary is ideal for console viewing and quick reference -- **Test Structure Only mode** is perfect for: - - Documenting existing test suites - - Understanding what a test file covers - - Quick analysis of a single test file - - Generating test case reports for review - -## See Also - -- `/test-coverage:gaps` - Identify untested code paths with priority-based analysis diff --git a/plugins/test-coverage/commands/gaps.md b/plugins/test-coverage/commands/gaps.md deleted file mode 100644 index ccaed7cf6..000000000 --- a/plugins/test-coverage/commands/gaps.md +++ /dev/null @@ -1,280 +0,0 @@ ---- -description: Identify E2E test scenario gaps in OpenShift/Kubernetes tests (component-agnostic) -argument-hint: [--output ] ---- - -## Name -test-coverage:gaps - -## Synopsis -```bash -/test-coverage:gaps [--output ] -``` - -## Description - -The `test-coverage:gaps` command **intelligently analyzes OpenShift/Kubernetes test files to identify missing test coverage**. It is **component-agnostic** and works for any OpenShift/K8s component (networking, storage, ETCD, Kube API, operators, etc.). **This command always generates three report formats: HTML (interactive), JSON (machine-readable), and Text (terminal-friendly).** - -**Component-Agnostic Analysis** (works for all OpenShift/K8s components): -- **Platform Coverage**: Which platforms (AWS, Azure, GCP, vSphere, Bare Metal, etc.) lack tests -- **Scenario Coverage**: Missing error handling, upgrade, security/RBAC, scale, performance tests -- **Priority-based recommendations**: Focus on high-impact gaps first -- **Component detection**: Automatically detects component type (networking, storage, kube-api, etcd, etc.) for informational purposes - -**Supported Components:** -- Networking (ingress, egress, SDN, OVN, network policies) -- Storage (volumes, storage classes, CSI, PV/PVC) -- Kube API, ETCD -- Auth/RBAC, OAuth -- Operators, controllers -- Observability, monitoring -- Image registry, builds -- Any other OpenShift/K8s component - -**Language Support:** This command currently supports Go projects only. - -This command helps OpenShift/Kubernetes QE or Dev teams focus testing efforts on the most critical untested scenarios. - -## Arguments - -- `$1` (test-file-or-url): Path or URL to OpenShift/Kubernetes test file - - **Local path**: `./test/extended/networking/ingress.go`, `/path/to/storage_test.go` - - **GitHub URL**: `https://github.com/openshift/origin/blob/master/test/extended/storage/volume.go` - - **URL**: Any HTTP(S) URL to a test file - - URLs are automatically downloaded and cached in `.work/test-coverage/cache/` - -### Optional Arguments - -- `--output `: Output directory for gap analysis reports (default: `.work/test-coverage/gaps/`) - -## Implementation - -### Step 1: Resolve Test File Input - -1. **Resolve test file path or URL**: - - If input is a URL: - - Download test file to `.work/test-coverage/cache/` - - Use cached version if already downloaded - - If input is a local path: - - Convert to absolute path and validate existence - - Verify file is a Go test file (contains `g.It`, `g.Describe`, or `Test` functions) - -### Step 2: Detect Component Type and Parse Test File - -1. **Detect component type** from file path and content: - - **Networking**: `/networking/`, network policy, ingress, egress patterns - - **Storage**: `/storage/`, volume, PV, PVC, storage class patterns - - **KAPI**: `/kapi/`, `/api/`, apiserver patterns - - **Auth**: `/auth/`, RBAC, OAuth patterns - - **Generic**: Fallback for unrecognized components - -2. **Extract test cases** using regex patterns: - - Ginkgo tests: `g.It(`, `g.Describe(`, `g.Context(` - - Standard Go tests: `func Test*` - - Extract test metadata from names (priority, bug IDs, tags) - -3. **Analyze component-specific coverage**: - - **For Networking**: Protocols, service types, IP stacks - - **For Storage**: Storage classes, volume modes, provisioners - - **For All Components**: Platforms, scenarios - -4. **Build coverage matrices**: - - Track component-specific dimensions - - Track platform coverage - - Track scenario coverage (error handling, upgrades, RBAC, scale) - -### Step 3: Identify Component-Aware Gaps - -1. **Compare tested vs. expected**: - - For each component-specific dimension, identify what's not tested - - Categorize gaps by priority based on production importance - -2. **Calculate priority scores** (component-specific): - - **High Priority**: - - Major cloud providers (AWS, Azure, GCP) - - Core component features (protocols for networking, storage classes for storage) - - Error handling scenarios - - Operator upgrades - - **Medium Priority**: - - Secondary platforms (Bare Metal, OpenStack) - - RBAC, scale, performance scenarios - - **Low Priority**: - - Edge case scenarios - -3. **Generate component-aware recommendations**: - - For each gap, provide specific test recommendation - - Estimate impact of gap - - Suggest test case to fill gap - -### Step 4: Generate Reports - -**Invoke the gaps skill** to generate analyzer script at runtime and produce all three report formats: - -1. **HTML Report** (`test-gaps-report.html`): - - Coverage scores dashboard - - What's tested vs. not tested matrices - - Priority-sorted gap list with recommendations - - Visual charts for protocol/platform coverage - -2. **JSON Report** (`test-gaps-report.json`): - - Test case metadata - - Coverage matrices - - Gap list with priorities - - Machine-readable for CI/CD - -3. **Text Summary** (`test-gaps-summary.txt`): - - Coverage percentages - - High priority gaps - - Recommendations - - Terminal-friendly format - -## Return Value - -- **Format**: Terminal output with summary + generated report files - -**Terminal Output (Networking Component Example):** -```text -Detected component: networking - -Test Coverage Gap Analysis Complete - -Summary: - Test Cases: 15 - Overall Coverage: 20.8% - -Coverage Scores: - Protocol Coverage: 0.0% - Platform Coverage: 83.3% - Service Type: 0.0% - Scenario Coverage: 0.0% - -High Priority Gaps (5): - 1. TCP - Most common protocol not tested - 2. UDP - Common protocol for DNS, streaming not tested - 3. LoadBalancer - External traffic not tested - 4. Error handling - Invalid configs not validated - 5. Operator upgrades - Upgrade path not tested - -Reports Generated: - ✓ HTML: .work/test-coverage/gaps/test-gaps-report.html - ✓ JSON: .work/test-coverage/gaps/test-gaps-report.json - ✓ Text: .work/test-coverage/gaps/test-gaps-summary.txt - -Recommendation: - Add 5-7 test cases to address high-priority gaps - Target: Improve coverage from 21% to 41% -``` - -**Terminal Output (Storage Component Example):** -```text -Detected component: storage - -Test Coverage Gap Analysis Complete - -Summary: - Test Cases: 12 - Overall Coverage: 35.0% - -Coverage Scores: - Storage Class Coverage: 33.3% - Volume Mode Coverage: 66.7% - Platform Coverage: 50.0% - Scenario Coverage: 20.0% - -High Priority Gaps (4): - 1. gp2/gp3 - AWS EBS storage not tested - 2. CSI - CSI drivers not tested - 3. ReadWriteOnce - Single-node write access not tested - 4. Error handling - Invalid configs not validated -``` - -**Exit Status:** -- 0: Analysis successful -- 1: Analysis failed (parsing error, missing file) - -## Examples - -### Example 1: Analyze networking test file - -```bash -/test-coverage:gaps ./test/extended/networking/egressip_udn.go -``` - -Detects networking component and analyzes protocol coverage (TCP, UDP, SCTP), service types, and scenarios. - -**Output:** Component: networking, 20.8% overall coverage, identifies gaps in TCP/UDP protocols, LoadBalancer service type, error handling, operator upgrades. - -### Example 2: Analyze storage test file - -```bash -/test-coverage:gaps ./test/e2e/storage/csi.go -``` - -Detects storage component and analyzes storage class coverage, volume modes, and provisioners. - -**Output:** Component: storage, identifies gaps in gp2/gp3 storage classes, ReadWriteMany volume mode, CSI drivers. - -### Example 3: Analyze remote test file - -```bash -/test-coverage:gaps https://github.com/openshift/origin/blob/master/test/extended/storage/volume.go -``` - -Downloads test file from GitHub and analyzes component-specific coverage gaps. - -### Example 5: Custom output directory - -```bash -/test-coverage:gaps ./test/e2e/auth/rbac.go --output ./reports/e2e-gaps/ -``` - -Generates component-aware gap reports in custom directory. - -## Prerequisites - -**General Requirements**: -- Python 3.8+ -- Access to test files -- Go test files (Ginkgo or standard Go tests) - -### Checking Prerequisites - -The command will check for required tools and suggest installation if missing. - -## Notes - -### General - -- **Test Scenario Analysis**: This command identifies missing test scenarios, platforms, and protocols in your e2e test suite -- **CRITICAL**: This command MUST always generate all three report formats (HTML, JSON, and Text). Failing to generate any report format should be treated as a command failure. -- **URL Support:** Test files can be URLs - - Supports GitHub, GitLab, and any HTTP(S) URLs - - Downloaded files are cached in `.work/test-coverage/cache/` - - GitHub blob URLs are automatically converted to raw URLs - - Clear cache with `rm -rf .work/test-coverage/cache/` to force re-download - -### Component-Aware Gap Analysis Notes - -- **Context-Aware Analysis**: The tool automatically detects component type and provides relevant recommendations -- **Component Types Supported**: - - **Networking**: Analyzes protocols, service types, IP stacks - - **Storage**: Analyzes storage classes, volume modes, provisioners - - **Generic**: Analyzes platforms and scenarios for unrecognized components -- **Focus on Production Readiness**: Gaps highlight missing scenarios that could impact production deployments -- **Platform Coverage Critical**: Missing tests for major cloud providers (AWS, Azure, GCP) are production blockers -- **Component-Specific Coverage**: Each component type has specific dimensions analyzed (protocols for networking, storage classes for storage, etc.) -- **Scenario Coverage**: Error handling, upgrades, RBAC, and scale tests are often overlooked but critical -- **Coverage Scores**: Overall coverage below 50% indicates significant e2e testing gaps -- Re-run after adding test cases to track improvement in component-specific coverage - -### Report Format Notes - -- The HTML report provides the best interactive experience with expandable details, sortable tables, and visual charts -- The JSON report is ideal for CI/CD integration and automated issue creation -- The Text report is useful for email summaries and terminal display -- JSON output can be integrated with issue tracking systems to create testing tasks -- Re-run this command after adding tests to measure progress - -## See Also - -- `/test-coverage:analyze` - Analyze test structure and organization diff --git a/plugins/test-coverage/skills/analyze/README.md b/plugins/test-coverage/skills/analyze/README.md deleted file mode 100644 index cb0712092..000000000 --- a/plugins/test-coverage/skills/analyze/README.md +++ /dev/null @@ -1,27 +0,0 @@ -# Test Structure Analysis Skill - -Analyze test code structure without running tests to identify coverage gaps. - -## Usage - -This skill is invoked via the `/test-coverage:analyze` command: - -```bash -/test-coverage:analyze -``` - -## What It Does - -- Analyzes test file structure (Go projects only) -- Identifies e2e and integration tests -- Finds files and functions without tests -- Generates HTML, JSON, and text reports - -## Output - -All reports are generated in `.work/test-coverage/analyze/`: -- `test-structure-report.html` - Interactive report -- `test-structure-report.json` - Machine-readable data -- `test-structure-summary.txt` - Terminal-friendly summary - -See [SKILL.md](SKILL.md) for detailed implementation guide. diff --git a/plugins/test-coverage/skills/analyze/SKILL.md b/plugins/test-coverage/skills/analyze/SKILL.md deleted file mode 100644 index a3e0d9646..000000000 --- a/plugins/test-coverage/skills/analyze/SKILL.md +++ /dev/null @@ -1,699 +0,0 @@ ---- -name: Test Structure Analysis -description: Analyze test code structure directly to provide coverage analysis ---- - -# Test Structure Analysis Skill - -This skill provides the ability to analyze test code structure **directly from test files** without running tests. It examines test files and source files to identify what is tested and what is not. - -## When to Use This Skill - -Use this skill when you need to: -- Analyze test code organization without running tests -- Identify files and functions without tests -- Understand what e2e/integration tests cover -- Find coverage gaps by examining test structure -- Generate comprehensive test structure reports -- Fast analysis (seconds, not minutes) - -## Prerequisites - -### Required Tools - -- **Python 3.8+** for test structure analysis -- **Go toolchain** for the target Go project - -### Installation - -```bash -# Ensure Python 3.8+ is installed -python3 --version - -# Go toolchain for target project -go version -``` - -## How It Works - -**Note: This skill currently supports Go projects only.** - -### Step 1: Discover Test and Source Files - -The analyzer discovers test and source files based on Go conventions: - -**Test Files:** -- Files ending with `_test.go` -- E2E/integration tests identified by: - - File naming patterns: `*e2e*_test.go`, `*integration*_test.go` - - Directory location: `test/e2e/`, `test/integration/`, `e2e/`, `integration/` - - Content markers: Ginkgo markers like `[Serial]`, `[Disruptive]`, `g.Describe(`, `g.It(` - -**Source Files:** -- Files ending with `.go` (excluding test files) -- Optionally exclude vendor, generated code, etc. - -### Step 2: Parse Test Files - -For each test file, extract: - -1. **Test functions/methods**: - - Function name - - Line number range - - Test framework (Go testing, Ginkgo) - - Test type (unit, integration, e2e) - -2. **Test targets** (what the test is testing): - - Imports and references to source files - - Function calls and instantiations - - Inferred from test names - -3. **Test metadata**: - - Test descriptions/documentation - - Test tags/markers - - Helper functions - -**Example for Go:** -```go -// File: pkg/handler_test.go -package handler_test - -import ( - "testing" - "myapp/pkg/handler" -) - -func TestHandleRequest(t *testing.T) { // ← Test function - h := handler.New() // ← Target: handler.New - result := h.HandleRequest("test") // ← Target: handler.HandleRequest - // ... -} -``` - -**Extraction result:** -```json -{ - "test_file": "pkg/handler_test.go", - "source_file": "pkg/handler.go", - "tests": [ - { - "name": "TestHandleRequest", - "lines": [6, 10], - "targets": ["handler.New", "handler.HandleRequest"], - "type": "unit" - } - ] -} -``` - -### Step 3: Parse Source Files - -For each source file, extract: - -1. **Functions/methods**: - - Function name - - Line number range - - Visibility (public/private/exported) - - Parameters and return types - - Complexity metrics - -2. **Classes/structs**: - - Type definitions - - Methods - - Fields - -**Example for Go:** -```go -// File: pkg/handler.go -package handler - -type Handler struct { - config Config -} - -func New() *Handler { // ← Function: New - return &Handler{} -} - -func (h *Handler) HandleRequest(req string) (string, error) { // ← Function: HandleRequest - if req == "" { - return "", errors.New("empty request") - } - return process(req), nil -} -``` - -**Extraction result:** -```json -{ - "source_file": "pkg/handler.go", - "functions": [ - { - "name": "New", - "lines": [8, 10], - "visibility": "exported", - "complexity": 1 - }, - { - "name": "HandleRequest", - "lines": [12, 20], - "visibility": "exported", - "complexity": 3, - "receiver": "Handler" - } - ] -} -``` - -### Step 4: Map Tests to Source Code - -Create a mapping between tests and source code: - -1. **Direct mapping** (test file → source file): - - `handler_test.go` → `handler.go` - -2. **Function-level mapping** (test → function): - - `TestHandleRequest` tests `HandleRequest` - -3. **Import-based mapping**: - - Analyze imports in test files to identify tested modules - -**Mapping result:** -```json -{ - "pkg/handler.go": { - "test_file": "pkg/handler_test.go", - "functions": { - "New": { - "tested": true, - "tests": ["TestHandleRequest"], - "test_count": 1 - }, - "HandleRequest": { - "tested": true, - "tests": ["TestHandleRequest"], - "test_count": 1 - } - }, - "overall_tested_functions": 2, - "overall_untested_functions": 0, - "function_test_coverage": 100.0 - } -} -``` - -### Step 5: Identify Coverage Gaps - -Identify what is **not tested**: - -1. **Untested source files**: - - Source files with no corresponding test file - - Priority: Based on file importance (exported functions) - -2. **Untested functions**: - - Functions not referenced in any tests - - Priority: Exported/public functions > private functions - -3. **Partially tested files**: - - Files with test file but missing tests for some functions - -**Gap categorization:** - -```json -{ - "gaps": { - "untested_files": [ - { - "file": "pkg/config.go", - "functions": 5, - "exported_functions": 3, - "priority": "high", - "reason": "No corresponding test file found" - } - ], - "untested_functions": [ - { - "file": "pkg/handler.go", - "function": "process", - "visibility": "private", - "priority": "low", - "reason": "Not referenced in any tests" - } - ] - }, - "summary": { - "total_source_files": 45, - "files_with_tests": 30, - "files_without_tests": 15, - "total_functions": 234, - "tested_functions": 189, - "untested_functions": 45, - "function_coverage_percentage": 80.8 - } -} -``` - -### Step 6: Generate Reports - -**IMPORTANT:** Claude Code generates all three report formats at runtime based on the analyzer's structured output. The analyzer script returns structured data (as JSON to stdout or via Python data structures), and Claude Code is responsible for generating all report files. - -The analyzer generates structured data containing full analysis results. Claude Code reads this data and generates three report formats: - -#### 1. JSON Report (`test-structure-report.json`) - -**Generated by:** Claude Code at runtime based on analyzer output - -Machine-readable format containing full analysis data. See Step 5 for structure. - -**How to generate:** -- Read structured data from analyzer (returned as JSON to stdout) -- Write to JSON file with `indent=2` for readability - -#### 2. Text Summary (`test-structure-summary.txt`) - -**Generated by:** Claude Code at runtime based on analyzer output - -Terminal-friendly summary showing: -- Overall statistics (files with/without tests, function coverage) -- High-priority gaps -- Recommendations - -**Format Structure:** -```text -============================================================ -Test Structure Analysis -============================================================ - -File: {filename} -Language: {language} -Analysis Date: {timestamp} - -============================================================ -Coverage Summary -============================================================ - -Total Source Files: {count} -Files With Tests: {count} ({percentage}%) -Files Without Tests: {count} ({percentage}%) - -Total Functions: {count} -Tested Functions: {count} ({percentage}%) -Untested Functions: {count} ({percentage}%) - -============================================================ -High Priority Gaps -============================================================ - -UNTESTED FILES: - 1. {filepath} - {reason} ({function_count} functions, {exported_count} exported) - ... - -UNTESTED FUNCTIONS: - 1. {filepath}::{function} - {reason} (visibility: {visibility}) - ... - -============================================================ -Recommendations -============================================================ - -Current Coverage: {current}% -Target Coverage: {target}% - -Focus on addressing HIGH priority gaps first to maximize -test coverage and ensure production readiness. -``` - -#### 3. HTML Report (`test-structure-report.html`) - -**Generated by:** Claude Code at runtime based on analyzer output - -Interactive HTML report with: - -**Required Sections:** - -1. **Header** with project info, language, and generation timestamp -2. **Summary Dashboard** with score cards showing: - - Total source files and files with/without tests - - Function coverage percentage - - High-priority gap count -3. **Untested Files Table** with columns: - - File path - - Function count - - Exported function count - - Priority (high/medium/low) -4. **Untested Functions Table** with columns: - - File path - - Function name - - Visibility (exported/private) - - Complexity score - - Priority -5. **Recommendations Section** grouped by priority - -**Styling:** -- Use the same CSS as gaps skill (modern gradient, cards, tables) -- Priority badges: high (red), medium (orange), low (blue) -- Escape all content with `html.escape()` - -## Implementation Steps - -When implementing this skill in a command: - -### Step 0: Generate Analyzer Script at Runtime - -**CRITICAL:** Before running any analysis, generate the analyzer script from the reference implementation. - -```bash -# Create output directory -mkdir -p .work/test-coverage/analyze/ - -# Generate the analyzer script from the specification below -# Claude Code will write test_structure_analyzer.py based on the Analyzer Specification section -``` - -**Analyzer Specification:** - -Generate a Python script (`test_structure_analyzer.py`) that analyzes Go test structure without running tests: - -**Input:** Path or URL to a Go test file or directory -**Output:** Structured JSON data printed to stdout, plus optional text summary to stderr - -**Core Algorithm:** - -0. **Input Processing** (handle URLs and local paths): - - Check if input starts with `http://` or `https://` - - If URL: Use `urllib.request.urlopen()` to fetch content, save to temp file - - If local path: Use directly - - After analysis: Clean up temp file if created - - Note: Directory URLs not supported, only single file URLs - -1. **File Discovery**: - - Test files: `*_test.go` (exclude vendor, generated code) - - E2E tests: Files in `test/e2e/`, `test/integration/`, or containing `[Serial]`, `[Disruptive]` markers - - Source files: `*.go` (exclude `*_test.go`, vendor) - -2. **Test Parsing** (regex-based): - - Ginkgo: `(?:g\.|o\.)?It\(\s*["']([^"']+)["']` → extract test name, line number - - Standard Go: `func (Test\w+)\(t \*testing\.T\)` → extract test function name - - Extract targets (functions called): regex for `\w+\([^)]*\)` inside test body - -3. **Source File Analysis**: - - Parse function definitions: `func (\w+)\(` or `func \(\w+ \*?\w+\) (\w+)\(` - - Track exported vs unexported (capitalized vs lowercase first letter) - -4. **Test-to-Source Mapping**: - - Convention: `handler_test.go` → `handler.go` - - Function-level: Match test names to source function names (e.g., `TestHandleRequest` → `HandleRequest`) - - Import analysis: Parse `import` statements in test files - -5. **Single File Mode** (when input is a file, not directory): - - Analyze only the test file structure - - Extract: test count, test names, imports, line numbers - - Skip source file mapping and gap detection - - Output: Test structure analysis only - -6. **Output Format** (JSON to stdout): -```json -{ - "language": "go", - "source_dir": "/path/to/src", - "test_only_mode": false, - "summary": { - "total_source_files": 45, - "total_test_files": 32, - "untested_files_count": 8 - }, - "test_file_details": { - "path": "/path/to/test.go", - "test_count": 15, - "tests": [ - {"name": "TestFoo", "line_start": 10, "line_end": 20, "targets": ["Foo", "Bar"]} - ], - "imports": ["testing", "github.com/onsi/ginkgo"] - } -} -``` - -**Command-line Interface:** -``` -python3 .work/test-coverage/analyze/test_structure_analyzer.py [--test-structure-only] [--output ] -``` - -**Why Runtime Generation:** -- Claude Code generates the analyzer from this specification -- No separate `.py` file to maintain -- SKILL.md is the single source of truth -- Simpler and more maintainable - -### Step 1: Validate Inputs - -Check that source directory exists and detect language if not specified. - -### Step 2: Execute Test Structure Analyzer - -```bash -# Run analyzer (outputs structured JSON to stdout) -python3 .work/test-coverage/analyze/test_structure_analyzer.py \ - \ - --priority \ - --output-json -``` - -The analyzer will output structured JSON to stdout containing: -- Test file analysis -- Source file analysis -- Test-to-source mappings -- Coverage gaps -- Summary statistics - -### Step 3: Generate All Three Report Formats at Runtime - -**IMPORTANT:** Claude Code generates all three report formats based on the analyzer's structured output. - -#### 3.1: Capture and Parse Analyzer Output - -```python -import json -import subprocess - -# Run analyzer and capture JSON output -result = subprocess.run( - ['python3', '.work/test-coverage/analyze/test_structure_analyzer.py', source_dir, '--output-json'], - capture_output=True, - text=True -) - -# Parse structured data -analysis_data = json.loads(result.stdout) -``` - -#### 3.2: Generate JSON Report - -```python -json_path = '.work/test-coverage/analyze/test-structure-report.json' -with open(json_path, 'w') as f: - json.dump(analysis_data, f, indent=2) -``` - -#### 3.3: Generate Text Summary Report - -Follow the text format specification in Step 6 to generate a terminal-friendly summary. - -```python -text_path = '.work/test-coverage/analyze/test-structure-summary.txt' -# Generate text content following format in Step 6 -with open(text_path, 'w') as f: - f.write(text_content) -``` - -#### 3.4: Generate HTML Report - -Follow the HTML specification in Step 6 to generate an interactive report. - -```python -html_path = '.work/test-coverage/analyze/test-structure-report.html' -# Generate HTML content following specification in Step 6 -with open(html_path, 'w') as f: - f.write(html_content) -``` - -### Step 4: Display Results - -Show summary and report locations to user: - -``` -Test Structure Analysis Complete - -Reports Generated: - ✓ HTML: .work/test-coverage/analyze/test-structure-report.html - ✓ JSON: .work/test-coverage/analyze/test-structure-report.json - ✓ Text: .work/test-coverage/analyze/test-structure-summary.txt -``` - -## ⚠️ MANDATORY PRE-COMPLETION VALIDATION - -**CRITICAL:** Before declaring this skill complete, you MUST execute ALL validation checks below. Failure to validate is considered incomplete execution. - -### Validation Checklist - -Execute these verification steps in order. ALL must pass: - -#### 1. File Existence Check - -```bash -# Verify all three reports exist -test -f .work/test-coverage/analyze/test-structure-report.html && echo "✓ HTML exists" || echo "✗ HTML MISSING" -test -f .work/test-coverage/analyze/test-structure-report.json && echo "✓ JSON exists" || echo "✗ JSON MISSING" -test -f .work/test-coverage/analyze/test-structure-summary.txt && echo "✓ Text exists" || echo "✗ Text MISSING" -``` - -**Required:** All three files must exist. If any are missing, regenerate them. - -#### 2. Test Case Extraction Verification - -```bash -# Verify test cases were extracted -python3 << 'EOF' -import json -try: - with open('.work/test-coverage/analyze/test-structure-report.json', 'r') as f: - data = json.load(f) - - test_count = data.get('summary', {}).get('test_cases_count', 0) - - if test_count > 0: - print(f"✓ Test cases extracted: {test_count}") - else: - print("✗ NO TEST CASES FOUND - verify test file contains Ginkgo tests") - exit(1) -except Exception as e: - print(f"✗ ERROR: {e}") - exit(1) -EOF -``` - -**Required:** Test cases must be extracted. Zero test cases indicates a parsing issue. - -#### 3. HTML Report Structure Verification - -```bash -# Verify HTML has required sections -grep -q "

Test Cases" .work/test-coverage/analyze/test-structure-report.html && \ - echo "✓ Test Cases section present" || \ - echo "✗ MISSING: Test Cases section" - -grep -q "

Coverage Summary" .work/test-coverage/analyze/test-structure-report.html && \ - echo "✓ Coverage Summary section present" || \ - echo "✗ MISSING: Coverage Summary section" -``` - -**Required:** HTML must have all structural sections. - -#### 4. JSON Structure Verification - -```python -# Verify JSON has all required fields -python3 << 'EOF' -import json -try: - with open('.work/test-coverage/analyze/test-structure-report.json', 'r') as f: - data = json.load(f) - - required_fields = [ - ('summary.language', lambda d: d['summary']['language']), - ('summary.test_cases_count', lambda d: d['summary']['test_cases_count']), - ('test_cases', lambda d: d['test_cases']), - ] - - missing = [] - for name, getter in required_fields: - try: - getter(data) - print(f"✓ {name}") - except (KeyError, TypeError): - print(f"✗ MISSING: {name}") - missing.append(name) - - if not missing: - print("\n✓ All required JSON fields present") - else: - print(f"\n✗ INCOMPLETE: Missing {len(missing)} required fields") - exit(1) -except Exception as e: - print(f"✗ ERROR: {e}") - exit(1) -EOF -``` - -**Required:** All required JSON fields must be present. - -### Validation Summary - -**Before declaring this skill complete:** - -1. ✓ All three report files exist -2. ✓ Test cases successfully extracted (count > 0) -3. ✓ HTML has all required sections -4. ✓ JSON contains all required fields - -**If ANY check fails:** Fix the issue and re-run all validation checks. Do NOT declare the skill complete until ALL checks pass. - -## Error Handling - -### Common Issues and Solutions - -1. **Unable to parse test/source files**: - - Use fallback regex-based parsing - - Log warnings for unparseable files - - Continue with partial analysis - -2. **No test files found**: - - Check if test patterns are correct for the Go project - - Ensure test files follow `*_test.go` naming convention - -3. **Complex project structures**: - - Allow excluding certain directories via `--exclude` - -## Examples - -### Example 1: Go Project - Basic Analysis - -```bash -# Analyze test structure for Go project -python3 .work/test-coverage/analyze/test_structure_analyzer.py /path/to/go/project - -# Output: -# Language: go -# Discovered 45 source files, 32 test files -# Function coverage: 80.8% (189/234 functions tested) -# High priority gaps: 8 files without tests -``` - -### Example 2: Go Project with Filters - -```bash -# Analyze only high-priority gaps -python3 .work/test-coverage/analyze/test_structure_analyzer.py /path/to/go/project \ - --priority high \ - --exclude "*/vendor/*" \ - --output reports/test-gaps/ -``` - -### Example 3: Single Test File Analysis - -```bash -# Analyze single test file structure -python3 .work/test-coverage/analyze/test_structure_analyzer.py ./test/e2e/networking/infw.go \ - --test-structure-only \ - --output ./reports/ -``` - -## Integration with Claude Code Commands - -This skill is used by: -- `/test-coverage:analyze ` - -The command invokes this skill to perform test structure analysis without running tests. - -## See Also - -- [Test Coverage Plugin README](../../README.md) - User guide and installation diff --git a/plugins/test-coverage/skills/gaps/README.md b/plugins/test-coverage/skills/gaps/README.md deleted file mode 100644 index 3969e5e92..000000000 --- a/plugins/test-coverage/skills/gaps/README.md +++ /dev/null @@ -1,29 +0,0 @@ -# Test Scenario Gap Analysis Skill - -Identify missing test scenarios, platforms, protocols, and coverage gaps in e2e tests. - -## Usage - -This skill is invoked via the `/test-coverage:gaps` command: - -```bash -/test-coverage:gaps -``` - -## What It Does - -- Detects component type automatically -- Analyzes platform coverage (AWS, Azure, GCP, etc.) -- Checks protocol coverage (TCP, UDP, SCTP) -- Identifies scenario gaps (error handling, upgrades, RBAC) -- Assigns priority to gaps (high/medium/low) -- Generates HTML, JSON, and text reports - -## Output - -All reports are generated in `.work/test-coverage/gaps/`: -- `test-gaps-report.html` - Interactive, filterable report -- `test-gaps-report.json` - Machine-readable gap data -- `test-gaps-summary.txt` - Terminal-friendly summary - -See [SKILL.md](SKILL.md) for detailed implementation guide. diff --git a/plugins/test-coverage/skills/gaps/SKILL.md b/plugins/test-coverage/skills/gaps/SKILL.md deleted file mode 100644 index ccbea04b3..000000000 --- a/plugins/test-coverage/skills/gaps/SKILL.md +++ /dev/null @@ -1,1109 +0,0 @@ ---- -name: Component-Aware Test Gap Analysis -description: Intelligently identify missing test coverage based on component type ---- - -# Component-Aware Test Gap Analysis Skill - -This skill **automatically detects component type** (networking, storage, API, etc.) and provides **context-aware gap analysis**. It analyzes e2e test files to identify missing test coverage specific to the component being tested. - -## When to Use This Skill - -Use this skill when you need to: -- **Automatically detect component type** from test file path and content -- **Component-specific gap analysis**: - - **Networking**: Identify missing protocol tests (TCP, UDP, SCTP), service type coverage, IP stack testing - - **Storage**: Find gaps in storage class coverage, volume mode testing, provisioner tests - - **Generic**: Analyze platform coverage and common scenarios for other components -- **Always analyze**: Cloud platform coverage (AWS, Azure, GCP, etc.) and scenario testing (error handling, upgrades, RBAC, scale) -- Prioritize testing efforts based on component-specific production importance -- Generate comprehensive component-aware gap analysis reports - -## ⚠️ CRITICAL REQUIREMENT - -**This skill MUST ALWAYS generate all three report formats (HTML, JSON, and Text) at runtime.** - -The gap analyzer script (generated at runtime to `.work/test-coverage/gaps/gap_analyzer.py`) performs the analysis and returns structured data. Claude Code is responsible for generating all three report formats based on this data. - -**Required Actions:** -1. ✅ **Execute**: `python3 .work/test-coverage/gaps/gap_analyzer.py --output-json` (outputs structured JSON to stdout) -2. ✅ **Generate**: Create all three report files (HTML, JSON, Text) at runtime -3. ✅ **Verify**: All three reports are generated successfully -4. ✅ **Display**: Show report locations and summary to the user - -**Failure to generate any of the three report formats** should be treated as a skill execution failure. - -## Prerequisites - -### Required Tools - -- **Python 3.8+** for test structure analysis -- **Go toolchain** for the target project - -### Installation - -```bash -# Python dependencies (standard library only, no external packages required) -# Ensure Python 3.8+ is installed - -# Optional Go analysis tools -go install golang.org/x/tools/cmd/guru@latest -go install golang.org/x/tools/cmd/goimports@latest -``` - -## How It Works - -**Note: This skill currently supports E2E/integration test files for OpenShift/Kubernetes components written in Go (Ginkgo framework).** - -### Current Implementation - -The analyzer performs **single test file analysis** with two analysis layers: - -1. **Generic Coverage Analysis** (keyword-based) - - Platforms, protocols, IP stacks, service types - - Uses regex pattern matching on file content - -2. **Feature-Based Analysis** (runtime extraction) - - Dynamically extracts features from test names - - Infers missing features based on patterns - - No hardcoded feature matrices - works for ANY component - -It does **not** perform repository traversal, Go AST parsing, or test-to-source mapping. - -### Analysis Flow - -#### Step 1: Component Type Detection - -The analyzer automatically detects the component type from: - -1. **File path patterns**: - - `/networking/` → networking component - - `/storage/` → storage component - - `/kapi/`, `/api/` → kube-api component - - `/etcd/` → etcd component - - `/auth/`, `/rbac/` → auth component - -2. **File content patterns**: - - Keywords like `sig-networking`, `networkpolicy`, `egressip` → networking - - Keywords like `sig-storage`, `persistentvolume` → storage - - Keywords like `sig-api`, `apiserver` → kube-api - -#### Step 2: Extract Test Cases - -Parses the test file using regex to extract: - -- **Test names** from Ginkgo `g.It("test name")` patterns -- **Line numbers** where tests are defined -- **Test tags** like `[Serial]`, `[Disruptive]`, `[NonPreRelease]` -- **Test IDs** from patterns like `-12345-` in test names - -**Example:** -```go -g.It("egressip-12345-should work on AWS [Serial]", func() { - // Test implementation -}) -``` - -Extracted: -- Name: `egressip-12345-should work on AWS [Serial]` -- ID: `12345` -- Tags: `[Serial]` -- Line: 42 - -#### Step 3: Analyze Coverage Using Regex - -For each component type, the analyzer searches the file content for specific keywords to determine what is tested: - -**Networking components:** -- **Platforms**: `vsphere`, `AWS`, `azure`, `GCP`, `baremetal` -- **Protocols**: `TCP`, `UDP`, `SCTP` -- **Service types**: `NodePort`, `LoadBalancer`, `ClusterIP` -- **Scenarios**: `invalid`, `upgrade`, `concurrent`, `performance`, `rbac` - -**Storage components:** -- **Platforms**: `vsphere`, `AWS`, `azure`, `GCP`, `baremetal` -- **Storage classes**: `gp2`, `gp3`, `csi` -- **Volume modes**: `ReadWriteOnce`, `ReadWriteMany`, `ReadOnlyMany` -- **Scenarios**: `invalid`, `upgrade`, `concurrent`, `performance`, `rbac` - -**Other components:** -- **Platforms**: `vsphere`, `AWS`, `azure`, `GCP`, `baremetal` -- **Scenarios**: `invalid`, `upgrade`, `concurrent`, `performance`, `rbac` - -#### Step 4: Identify Gaps - -For each coverage dimension, if a keyword is **not found** in the file, it's flagged as a gap: - -**Example:** -```python -# If file content doesn't contain "azure" (case-insensitive) -gaps.append({ - 'platform': 'Azure', - 'priority': 'high', - 'impact': 'Major cloud provider - production blocker', - 'recommendation': 'Add Azure platform-specific tests' -}) -``` - -#### Step 5: Calculate Component-Aware Coverage Scores - -Scoring is component-specific to avoid penalizing components for irrelevant metrics: - -**Networking components:** -- Overall = avg(platform_score, protocol_score, service_type_score, scenario_score) - -**Storage components:** -- Overall = avg(platform_score, storage_class_score, volume_mode_score, scenario_score) - -**Other components:** -- Overall = avg(platform_score, scenario_score) - -Each dimension score = (items_found / total_items) × 100 - -#### Step 5a: Dynamic Feature Extraction (Runtime Analysis) - -In addition to the keyword-based coverage analysis above, the analyzer performs **dynamic feature extraction** to identify component-specific features from test names at runtime, without any hardcoded feature matrices. - -**How Runtime Feature Extraction Works:** - -1. **Extract Features from Test Names** - - Parse test names to identify features being tested: - - **Example Test Name:** - ``` - "Validate egressIP with mixed of multiple non-overlapping UDNs and default network(layer3/2 and IPv4 only)" - ``` - - **Extracted Features:** - - ✓ Non-overlapping configuration - - ✓ Multiple resource configuration - - ✓ Mixed configuration - - ✓ User Defined Networks (UDN) - - ✓ Default network - - ✓ Layer 3 networking - -2. **Group Features into Categories** - - Features are automatically categorized: - - - **Configuration Patterns**: overlapping, non-overlapping, single, multiple, mixed - - **Network Topology**: UDN, default network, layer2, layer3, gateway modes - - **Lifecycle Operations**: creation, deletion, recreation, assignment - - **Network Features**: failover, load balancing, isolation - - **Resilience & Recovery**: reboot, restart, node deletion - -3. **Infer Missing Features** - - Based on patterns, infer what's missing: - - - **Opposite patterns**: If "overlapping" tested → suggest "non-overlapping" - - **Single vs Multiple**: If "single resource" tested → suggest "multiple resources" - - **Completeness**: If "deletion" tested → suggest "recreation" - - **Layer coverage**: If "layer2" tested → suggest "layer3" - -**Benefits of Runtime Feature Extraction:** - -✅ **No Hardcoding** - Works for ANY component without configuration -✅ **Intelligent Gap Detection** - Infers missing features based on patterns -✅ **Component-Agnostic** - Automatically adapts to any component type -✅ **Always Current** - Extracts from actual test names, not assumed features - -**Example: EgressIP Test Analysis** - -**Input (Test Names):** -``` -1. Validate egressIP with mixed of multiple non-overlapping UDNs -2. Validate egressIP with mixed of multiple overlapping UDNs -3. Validate egressIP Failover with UDNs -4. egressIP after UDN deleted then recreated -5. egressIP after OVNK restarted -6. Traffic is load balanced between egress nodes -``` - -**Output (Extracted Features):** -``` -Configuration Patterns: - ✓ Non-overlapping configuration - ✓ Overlapping configuration - ✓ Multiple resource configuration - ✓ Mixed configuration - -Network Topology: - ✓ User Defined Networks (UDN) - -Lifecycle Operations: - ✓ Resource deletion - ✓ Resource recreation - -Network Features: - ✓ Failover - ✓ Load balancing - -Resilience & Recovery: - ✓ OVN-Kubernetes restart -``` - -**Output (Inferred Feature Gaps):** -``` -[HIGH] Single resource configuration - - Pattern suggests "multiple" tested but not "single" - - Recommendation: Add single resource baseline tests - -[HIGH] Layer 2 networking - - Layer 3 tested but Layer 2 missing - - Recommendation: Add Layer 2 network topology tests - -[MEDIUM] Local gateway mode - - Gateway mode mentioned but local vs shared not clear - - Recommendation: Add explicit gateway mode tests -``` - -**Integration in gap_analyzer.py:** - -The dynamic feature extractor is built into the analyzer (no separate import needed): - -```python -# After extracting test cases -feature_analysis = extract_features_from_tests(test_cases) - -# Results included in analysis output -tested_features = feature_analysis['tested_features'] -# {'Configuration Patterns': ['Overlapping', 'Non-overlapping', ...], -# 'Network Topology': ['UDN', 'Layer3', ...]} - -feature_gaps = feature_analysis['feature_gaps'] -# [{'feature': 'Multiple resources', 'priority': 'high', ...}] - -coverage_stats = feature_analysis['coverage_stats'] -# {'features_tested': 14, 'features_missing': 5} -``` - -**Report Integration:** - -Feature analysis is included in all three report formats: - -- **HTML Reports**: Feature sections with tested/missing features -- **Text Reports**: Feature lists grouped by category -- **JSON Reports**: Structured feature data for CI/CD integration - -### Limitations - -The current implementation has the following limitations: - -❌ **No repository traversal** - Analyzes only the single test file provided as input -❌ **No Go AST parsing** - Uses regex pattern matching instead of parsing Go syntax trees -❌ **No test-to-source mapping** - Cannot map test functions to source code functions -❌ **No function-level coverage** - Cannot determine which source functions are tested -❌ **No project-wide analysis** - Cannot analyze multiple test files or aggregate results -❌ **Keyword-based detection only** - Gap detection relies on keyword presence in test file -❌ **Single file focus** - Reports cover only the analyzed test file, not the entire codebase - -These limitations mean the analyzer provides **scenario and platform coverage analysis** for a single E2E test file, not structural code coverage across a codebase. - -#### Step 6: Generate Reports - -The analyzer generates three report formats. You should generate Python code at runtime to create these reports. - -#### 1. HTML Gap Report (`test-gaps-report.html`) - -**Purpose:** Interactive, filterable HTML report for visual gap analysis with professional styling - -**HTML Document Structure:** - -Generate a complete HTML5 document with the following structure: - -```html - - - - - - Test Coverage Gap Analysis - {filename} - - - -
- -
- - - -``` - -**CSS Styles (Inline in ` - - -
-

🧬 Mutation Testing Report

- -
-
-
$MUTATION_SCORE%
-
Mutation Score
-
-
-
$KILLED_MUTANTS
-
Killed (Good)
-
-
-
$SURVIVED_MUTANTS
-
Survived (Bad)
-
-
-
$TOTAL_MUTANTS
-
Total Mutants
-
-
- -

Verdict: $(echo $VERDICT | tr '[:lower:]' '[:upper:]')

- -

Survived Mutants (Need Attention)

-EOF - - if [ -z "$SURVIVED_LIST" ]; then - echo "

✓ Excellent! All mutations were caught by tests.

" >> "$WORK_DIR/mutation-report.html" - else - cat >> "$WORK_DIR/mutation-report.html" << 'EOF' - - - - - - - -EOF - - echo "$SURVIVED_LIST" | while read mutant_id; do - RESULT_FILE="$RESULTS_DIR/${mutant_id}-result.json" - TYPE=$(jq -r '.type' "$RESULT_FILE") - FILE=$(jq -r '.file' "$RESULT_FILE") - LINE=$(jq -r '.line' "$RESULT_FILE") - DESC=$(jq -r '.description' "$RESULT_FILE") - - cat >> "$WORK_DIR/mutation-report.html" << EOF - - - - - - -EOF - done - - echo "
IDTypeLocationDescription
$mutant_id$TYPE$FILE:$LINE$DESC
" >> "$WORK_DIR/mutation-report.html" - fi - - cat >> "$WORK_DIR/mutation-report.html" << 'EOF' - -

💡 Performance Note

-

This mutation testing used an in-place mutation strategy - no repository copies were created! This makes mutation testing practical even for large repositories.

-
- - -EOF - - echo "📊 HTML Report: file://$(pwd)/$WORK_DIR/mutation-report.html" - ;; - - markdown) - # Generate Markdown report - cat > "$WORK_DIR/mutation-report.md" << EOF -# Mutation Testing Report - -**Date:** $(date '+%Y-%m-%d %H:%M:%S') - -## Summary - -| Metric | Value | -|--------|-------| -| **Mutation Score** | **${MUTATION_SCORE}%** | -| Total Mutants | $TOTAL_MUTANTS | -| Killed (Good) | $KILLED_MUTANTS | -| Survived (Bad) | $SURVIVED_MUTANTS | -| Verdict | ${VERDICT^^} | - -## Survived Mutants - -EOF - - if [ -z "$SURVIVED_LIST" ]; then - echo "✓ **Excellent!** All mutations were caught by tests." >> "$WORK_DIR/mutation-report.md" - else - cat >> "$WORK_DIR/mutation-report.md" << 'EOF' -| Mutant ID | Type | Location | Description | -|-----------|------|----------|-------------| -EOF - - echo "$SURVIVED_LIST" | while read mutant_id; do - RESULT_FILE="$RESULTS_DIR/${mutant_id}-result.json" - TYPE=$(jq -r '.type' "$RESULT_FILE") - FILE=$(jq -r '.file' "$RESULT_FILE") - LINE=$(jq -r '.line' "$RESULT_FILE") - DESC=$(jq -r '.description' "$RESULT_FILE") - - echo "| \`$mutant_id\` | $TYPE | \`$FILE:$LINE\` | $DESC |" >> "$WORK_DIR/mutation-report.md" - done - fi - - echo "" >> "$WORK_DIR/mutation-report.md" - echo "---" >> "$WORK_DIR/mutation-report.md" - echo "*Generated using in-place mutation testing - no repository copies created!*" >> "$WORK_DIR/mutation-report.md" - - echo "📊 Markdown Report: $(pwd)/$WORK_DIR/mutation-report.md" - ;; - - json) - # JSON report from all result files - jq -n -s \ - --arg score "$MUTATION_SCORE" \ - --arg verdict "$VERDICT" \ - --argjson killed "$KILLED_MUTANTS" \ - --argjson survived "$SURVIVED_MUTANTS" \ - --argjson total "$TOTAL_MUTANTS" \ - '{ - summary: { - mutation_score: ($score | tonumber), - verdict: $verdict, - killed: $killed, - survived: $survived, - total: $total - }, - results: . - }' "$RESULTS_DIR"/*-result.json > "$WORK_DIR/mutation-report.json" - - echo "📊 JSON Report: $(pwd)/$WORK_DIR/mutation-report.json" - ;; -esac - -echo "" -``` - ---- - -## Return Value - -**Format**: Mutation testing report with the following sections: - -1. **Summary Statistics:** - - Total mutants generated - - Mutants killed (tests caught the bug) - - Mutants survived (tests missed the bug) - - Mutation score percentage - -2. **Detailed Results Per Controller:** - - Mutation breakdown by controller - - Mutation score per controller - - Areas with weak test coverage - -3. **Survived Mutants Analysis:** - - List of all survived mutants - - Code location of each mutation - - Type of mutation applied - - Recommended test cases to add - -4. **Actionable Recommendations:** - - Priority order for adding tests - - Example test cases for survived mutants - - Patterns of weak coverage - -5. **File Locations:** - - HTML/Markdown report path - - JSON analysis data path - - Individual mutation result files in `.work/mutation-testing/results/` - -**Exit Codes:** -- `0`: Mutation testing completed successfully -- `1`: Baseline tests failed (fix tests first) -- `2`: No controllers found -- `3`: Mutation generation failed - -## Examples - -### Example 1: Basic Mutation Testing - -Test all controllers with default settings: - -``` -/testing:mutation-test -``` - -Output: -``` -Discovering controllers... -Found 3 controllers: PodController, ServiceController, DeploymentController - -Generating mutations... -✓ Generated 145 mutants - -Running baseline tests... -✓ Baseline tests passed - -Testing mutants... [145/145] - ✓ Killed: 124 - ⚠️ Survived: 21 - -Mutation Score: 85.5% - -📊 Report: .work/mutation-testing/mutation-report.html -``` - ---- - -### Example 2: Test Specific Controllers - -Test only specific controllers: - -``` -/testing:mutation-test --controllers PodController,ServiceController -``` - ---- - -### Example 3: Focus on Error Handling - -Test only error handling mutations: - -``` -/testing:mutation-test --mutation-types error-handling,returns -``` - -Use case: After adding error handling code, validate it's properly tested. - ---- - -### Example 4: Generate Markdown Report - -Generate markdown report for PR comments: - -``` -/testing:mutation-test --report-format markdown -``` - -Can paste the markdown report into PR comments to show test quality improvements. - ---- - -### Example 5: Operator with Custom Path - -Test operator in different directory: - -``` -/testing:mutation-test ~/git/my-operator -``` - ---- - -## Notes - -### Mutation Score Interpretation - -- **90%+**: Excellent - Strong test suite with comprehensive edge case coverage -- **80-90%**: Good - Solid test coverage, minor gaps in edge cases -- **70-80%**: Fair - Adequate coverage but notable gaps in error handling -- **< 70%**: Poor - Significant gaps in test coverage, high risk of bugs - -### Performance Considerations - -- Mutation testing is computationally expensive (runs tests N times for N mutants) -- Typical operator with 2000 LOC controller code generates 100-200 mutants -- With 30-second test suite, expect 50-100 minutes for full mutation testing -- Consider running on CI with parallelization, or locally with filtered mutation types - -### Best Practices - -1. **Start Small**: Test one controller at a time initially -2. **Fix Baseline First**: Ensure all tests pass before mutation testing -3. **Iterate**: Focus on high-value mutations (error handling, conditionals) -4. **Integrate into CI**: Run mutation testing weekly or per release -5. **Track Progress**: Measure mutation score improvement over time - -### Limitations - -- Cannot detect equivalent mutants (mutations that don't change behavior) -- Test suite runtime directly impacts mutation testing time -- May generate false positives if tests are non-deterministic -- Requires Go toolchain and controller-runtime setup - -### Scope and Exclusions - -**Included Files:** -- All files matching `*controller.go` pattern anywhere in the codebase (e.g., `operator_controller.go`, `pod_controller.go`) -- Controller files under `controller/` or `controllers/` directories (singular or plural) -- Related reconciliation logic and helper functions - -**Excluded Files:** -- **Vendor directories**: The `vendor/` folder is automatically excluded from mutation testing as it contains third-party dependencies that should not be mutated -- Generated code and third-party packages - -### Common Survived Mutants - -**Patterns that often indicate missing tests:** -- Error handling not tested (`if err != nil` removed) -- Requeue logic not validated (changed requeue timing) -- Status updates not verified (skipped status updates) -- Finalizer logic not tested (removed finalizer checks) -- Condition transitions not validated (changed condition states) - -## See Also - -- Go mutation testing tools: `go-mutesting`, `gremlins` -- Operator testing patterns: controller-runtime testing docs -- `/openshift:new-e2e-test` - Generate E2E tests for operators -- `/utils:generate-test-plan` - Create test plans for PRs - -## References - -- [Mutation Testing: A Comprehensive Survey](https://ieeexplore.ieee.org/document/5487526) -- [controller-runtime Testing Guide](https://book.kubebuilder.io/cronjob-tutorial/writing-tests.html) -- [Google Testing Blog: Mutation Testing](https://testing.googleblog.com/2021/04/mutation-testing.html) - diff --git a/plugins/testing/skills/mutation-generator/SKILL.md b/plugins/testing/skills/mutation-generator/SKILL.md deleted file mode 100644 index 7da73fd9b..000000000 --- a/plugins/testing/skills/mutation-generator/SKILL.md +++ /dev/null @@ -1,648 +0,0 @@ ---- -name: Mutation Generator for Operator Controllers -description: Generate code mutations for Kubernetes operator controllers to enable mutation testing. Applies operator-specific mutations to reconciliation logic, error handling, and API interactions. ---- - -# Mutation Generator for Operator Controllers - -This skill generates mutations (deliberate bugs) in operator controller code to enable mutation testing. It focuses on patterns common in Kubernetes operators built with controller-runtime. - -## When to Use This Skill - -Use this skill when: -- You need to generate mutants for operator mutation testing -- You want to create realistic bugs that tests should catch -- You're analyzing operator controller code for mutation points -- You need operator-specific mutations (requeue, status updates, API calls) - -## Prerequisites - -1. **Go operator code** using controller-runtime framework -2. **Python 3.8+** for mutation generation scripts -3. **Go AST parsing** capability (via go/parser or Python ast module) - -## Mutation Strategy for Operators - -### Understanding Operator Mutation Points - -Kubernetes operators have specific patterns that should be tested: - -**1. Reconciliation Logic:** -- Conditional checks on resource state -- Decision trees for state transitions -- Owner reference checks -- Finalizer logic - -**2. Error Handling:** -- API call error handling -- Client-go errors (NotFound, AlreadyExists, Conflict) -- Wrapped errors -- Error return paths - -**3. Requeue Behavior:** -- `ctrl.Result{Requeue: true}` -- `ctrl.Result{RequeueAfter: duration}` -- Rate limiting -- Conditional requeueing - -**4. Status Updates:** -- Condition setting (Ready, Available, Degraded) -- ObservedGeneration tracking -- Status subresource updates -- Partial status updates - -**5. API Interactions:** -- Get vs. List operations -- Create vs. Update logic -- Patch vs. Update -- Delete with preconditions - -## Implementation Steps - -### Step 1: Parse Controller Code - -**1.1 Identify Reconcile Functions** - -Look for the standard controller-runtime reconciliation signature: - -```go -func (r *MyReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) -``` - -**1.2 Extract Abstract Syntax Tree (AST)** - -Use Go's parser to build AST: - -```python -import go_parser # or use subprocess to call go/ast tools - -def parse_controller(file_path): - """Parse Go controller file into AST.""" - # Read file - with open(file_path, 'r') as f: - source = f.read() - - # Parse into AST - ast = parse_go_file(source) - - # Find Reconcile function - reconcile_func = find_reconcile_function(ast) - - return { - 'source': source, - 'ast': ast, - 'reconcile': reconcile_func, - 'file_path': file_path - } -``` - -**1.3 Identify Mutation Candidates** - -Walk the AST to find mutation points: - -```python -def find_mutation_points(ast_node): - """Find all locations where mutations can be applied.""" - mutation_points = [] - - # Walk AST nodes - for node in walk_ast(ast_node): - if is_conditional(node): - mutation_points.append({ - 'type': 'conditional', - 'location': node.position, - 'original': node.text, - 'node': node - }) - elif is_error_check(node): - mutation_points.append({ - 'type': 'error-handling', - 'location': node.position, - 'original': node.text, - 'node': node - }) - # ... more patterns - - return mutation_points -``` - ---- - -### Step 2: Generate Conditional Mutations - -**2.1 Comparison Operator Mutations** - -Mutate comparison operators: - -```python -CONDITIONAL_MUTATIONS = { - '==': ['!='], - '!=': ['=='], - '<': ['>', '<=', '>='], - '>': ['<', '<=', '>='], - '<=': ['<', '>='], - '>=': ['>', '<='], -} - -def mutate_conditional(node): - """Generate mutations for conditional expressions.""" - mutations = [] - - if node.operator in CONDITIONAL_MUTATIONS: - for new_op in CONDITIONAL_MUTATIONS[node.operator]: - mutations.append({ - 'id': generate_mutant_id(), - 'type': 'conditional', - 'description': f'Change {node.operator} to {new_op}', - 'location': f'{node.file}:{node.line}', - 'original': node.text, - 'mutated': node.text.replace(node.operator, new_op, 1), - 'pattern': 'comparison-operator' - }) - - return mutations -``` - -**2.2 Boolean Expression Mutations** - -Negate boolean expressions: - -```python -def mutate_boolean_expr(node): - """Negate boolean expressions.""" - mutations = [] - - # if condition → if !condition - # if !condition → if condition - - if node.is_negated: - # Remove negation - mutated = node.text.lstrip('!') - description = 'Remove negation' - else: - # Add negation - mutated = f'!({node.text})' - description = 'Add negation' - - mutations.append({ - 'id': generate_mutant_id(), - 'type': 'conditional', - 'description': description, - 'location': f'{node.file}:{node.line}', - 'original': node.text, - 'mutated': mutated, - 'pattern': 'boolean-negation' - }) - - return mutations -``` - -**2.3 Operator-Specific Conditional Mutations** - -Target operator patterns: - -```python -# Example: Mutate finalizer checks -# Original: if contains(obj.Finalizers, MyFinalizer) -# Mutated: if !contains(obj.Finalizers, MyFinalizer) - -# Example: Mutate generation checks -# Original: if obj.Generation != obj.Status.ObservedGeneration -# Mutated: if obj.Generation == obj.Status.ObservedGeneration -``` - ---- - -### Step 3: Generate Error Handling Mutations - -**3.1 Remove Error Checks** - -Most critical mutation type for operators: - -```python -def mutate_error_handling(node): - """Mutate error handling code.""" - mutations = [] - - # Pattern: if err != nil { return ... } - if is_error_check_pattern(node): - mutations.append({ - 'id': generate_mutant_id(), - 'type': 'error-handling', - 'description': 'Remove error check', - 'location': f'{node.file}:{node.line}', - 'original': node.text, - 'mutated': f'// MUTANT: Removed error check\n// {node.text}', - 'pattern': 'remove-error-check' - }) - - return mutations -``` - -**3.2 Change Error Returns** - -```python -def mutate_error_return(node): - """Mutate error return statements.""" - mutations = [] - - # Pattern: return ctrl.Result{}, err - # Mutate to: return ctrl.Result{}, nil - - if is_error_return(node): - mutations.append({ - 'id': generate_mutant_id(), - 'type': 'error-handling', - 'description': 'Return nil instead of error', - 'location': f'{node.file}:{node.line}', - 'original': node.text, - 'mutated': node.text.replace(', err', ', nil'), - 'pattern': 'error-return-nil' - }) - - return mutations -``` - -**3.3 Ignore Specific Error Types** - -Kubernetes-specific error mutations: - -```python -def mutate_k8s_errors(node): - """Mutate Kubernetes error handling.""" - mutations = [] - - # Pattern: if errors.IsNotFound(err) - # Mutate to: if errors.IsAlreadyExists(err) - - # Pattern: if errors.IsAlreadyExists(err) - # Mutate to: if errors.IsNotFound(err) - - if is_k8s_error_check(node): - error_types = ['IsNotFound', 'IsAlreadyExists', 'IsConflict', 'IsInvalid'] - current_type = extract_error_type(node) - - for new_type in error_types: - if new_type != current_type: - mutations.append({ - 'id': generate_mutant_id(), - 'type': 'error-handling', - 'description': f'Change {current_type} to {new_type}', - 'location': f'{node.file}:{node.line}', - 'original': node.text, - 'mutated': node.text.replace(current_type, new_type), - 'pattern': 'k8s-error-type' - }) - - return mutations -``` - ---- - -### Step 4: Generate Requeue Mutations - -**4.1 Toggle Requeue Flag** - -```python -def mutate_requeue(node): - """Mutate requeue behavior.""" - mutations = [] - - # Pattern: return ctrl.Result{}, nil - # Mutate to: return ctrl.Result{Requeue: true}, nil - - if is_result_return(node): - # If no requeue, add requeue - if 'Requeue' not in node.text: - mutations.append({ - 'id': generate_mutant_id(), - 'type': 'requeue', - 'description': 'Add unnecessary requeue', - 'location': f'{node.file}:{node.line}', - 'original': node.text, - 'mutated': node.text.replace('ctrl.Result{}', 'ctrl.Result{Requeue: true}'), - 'pattern': 'add-requeue' - }) - - # If requeue exists, remove it - if 'Requeue: true' in node.text: - mutations.append({ - 'id': generate_mutant_id(), - 'type': 'requeue', - 'description': 'Remove requeue flag', - 'location': f'{node.file}:{node.line}', - 'original': node.text, - 'mutated': node.text.replace('Requeue: true', 'Requeue: false'), - 'pattern': 'remove-requeue' - }) - - return mutations -``` - -**4.2 Change Requeue Timing** - -```python -def mutate_requeue_after(node): - """Mutate RequeueAfter duration.""" - mutations = [] - - # Pattern: RequeueAfter: 5 * time.Second - # Mutate to: RequeueAfter: 0 - # Or: RequeueAfter: 5 * time.Minute (change unit) - - if 'RequeueAfter' in node.text: - mutations.extend([ - { - 'id': generate_mutant_id(), - 'type': 'requeue', - 'description': 'Set RequeueAfter to zero', - 'location': f'{node.file}:{node.line}', - 'original': node.text, - 'mutated': set_requeue_after_zero(node.text), - 'pattern': 'requeue-timing-zero' - }, - { - 'id': generate_mutant_id(), - 'type': 'requeue', - 'description': 'Change RequeueAfter time unit', - 'location': f'{node.file}:{node.line}', - 'original': node.text, - 'mutated': change_time_unit(node.text), - 'pattern': 'requeue-timing-unit' - } - ]) - - return mutations -``` - ---- - -### Step 5: Generate Status Update Mutations - -**5.1 Skip Status Updates** - -```python -def mutate_status_update(node): - """Mutate status update calls.""" - mutations = [] - - # Pattern: r.Status().Update(ctx, obj) - # Mutate to: // MUTANT: Skipped status update - - if is_status_update_call(node): - mutations.append({ - 'id': generate_mutant_id(), - 'type': 'status', - 'description': 'Skip status update', - 'location': f'{node.file}:{node.line}', - 'original': node.text, - 'mutated': f'// MUTANT: Skipped status update\n// {node.text}', - 'pattern': 'skip-status-update' - }) - - return mutations -``` - -**5.2 Change Condition Values** - -```python -def mutate_condition(node): - """Mutate condition setting.""" - mutations = [] - - # Pattern: SetCondition(Ready, True, "Ready", "...") - # Mutate to: SetCondition(Ready, False, "Ready", "...") - - if is_set_condition_call(node): - # Toggle condition status - if 'True' in node.text: - mutated = node.text.replace('True', 'False', 1) - desc = 'Change condition to False' - else: - mutated = node.text.replace('False', 'True', 1) - desc = 'Change condition to True' - - mutations.append({ - 'id': generate_mutant_id(), - 'type': 'status', - 'description': desc, - 'location': f'{node.file}:{node.line}', - 'original': node.text, - 'mutated': mutated, - 'pattern': 'condition-value' - }) - - return mutations -``` - ---- - -### Step 6: Generate API Call Mutations - -**6.1 Change API Operation Type** - -```python -def mutate_api_call(node): - """Mutate Kubernetes API calls.""" - mutations = [] - - # Pattern: r.Get(ctx, key, obj) - # Mutate to: r.List(ctx, obj) [intentional API misuse] - - if is_client_call(node): - operation = extract_operation(node) # Get, List, Create, Update, Delete - - alternative_ops = { - 'Get': ['List'], # Get → List (wrong cardinality) - 'Update': ['Patch'], # Update → Patch (different semantics) - 'Create': ['Update'], # Create → Update (wrong operation) - } - - if operation in alternative_ops: - for alt_op in alternative_ops[operation]: - mutations.append({ - 'id': generate_mutant_id(), - 'type': 'api-calls', - 'description': f'Change {operation} to {alt_op}', - 'location': f'{node.file}:{node.line}', - 'original': node.text, - 'mutated': node.text.replace(operation, alt_op, 1), - 'pattern': 'api-operation-type' - }) - - return mutations -``` - ---- - -### Step 7: Apply Mutations In-Place (No Copies!) - -**7.1 Generate Mutation Metadata Only** - -For each mutation, generate metadata WITHOUT copying files: - -```python -def generate_mutation_metadata(mutation, operator_path, output_file): - """Generate mutation metadata for in-place application.""" - - mutant_id = mutation['id'] - - # Save mutation metadata only (no file copies) - metadata = { - 'id': mutant_id, - 'type': mutation['type'], - 'description': mutation['description'], - 'file': mutation['file'], - 'line': mutation['line'], - 'original': mutation['original'], - 'mutated': mutation['mutated'], - 'pattern': mutation['pattern'] - } - - return metadata - -def save_all_mutations(mutations, output_file): - """Save all mutation definitions to a single JSON file.""" - - with open(output_file, 'w') as f: - json.dump({ - 'total_mutations': len(mutations), - 'mutations': mutations - }, f, indent=2) -``` - -**Note**: The mutation testing workflow will apply each mutation in-place to the original file, run tests, then immediately revert the change. This avoids creating GB of repository copies. - -**7.2 Apply and Revert Mutations In-Place** - -```python -def apply_mutation_to_file(file_path, mutation): - """Apply mutation in-place to the original file.""" - - with open(file_path, 'r') as f: - lines = f.readlines() - - # Find and replace the specific line - target_line = mutation['line'] - 1 # 0-indexed - - if target_line < len(lines): - # Replace exact match on that line - lines[target_line] = lines[target_line].replace( - mutation['original'], - mutation['mutated'], - 1 # Replace only first occurrence - ) - - # Write back to the ORIGINAL file - with open(file_path, 'w') as f: - f.writelines(lines) - -def revert_mutation(file_path, mutation): - """Revert mutation by restoring the original code.""" - - with open(file_path, 'r') as f: - lines = f.readlines() - - target_line = mutation['line'] - 1 - - if target_line < len(lines): - # Restore original code - lines[target_line] = lines[target_line].replace( - mutation['mutated'], - mutation['original'], - 1 - ) - - with open(file_path, 'w') as f: - f.writelines(lines) -``` - -**Usage Pattern**: Apply mutation → Run tests → Immediately revert. No file copies needed! - ---- - -## Output Format - -The mutation generator produces: - -**1. Mutations Catalog (JSON)** - -```json -{ - "total_mutations": 145, - "mutations_by_type": { - "conditional": 42, - "error-handling": 38, - "requeue": 18, - "status": 25, - "api-calls": 15, - "returns": 7 - }, - "mutations": [ - { - "id": "mutant-001", - "type": "error-handling", - "description": "Remove error check after API Get", - "file": "controllers/pod_controller.go", - "line": 87, - "function": "Reconcile", - "pattern": "remove-error-check", - "original": "if err != nil { return ctrl.Result{}, err }", - "mutated": "// MUTANT: Removed error check" - } - ] -} -``` - -**2. Mutation Metadata Storage** - -``` -.work/mutation-testing/ -├── mutations.json # All mutation definitions -├── results/ -│ ├── mutant-001-result.json # Metadata about this mutation -│ ├── mutant-001-output.txt # Test output -│ ├── mutant-002-result.json -│ └── mutant-002-output.txt -... -``` - -**Note**: Mutations are applied **in-place** to the original files, tested, then immediately reverted. No full repository copies are created, keeping disk usage minimal (<1MB). - -## Error Handling - -- **No controllers found**: Warn user and provide suggestions -- **Parse errors**: Skip files with syntax errors, report them -- **Invalid mutations**: Validate mutations don't break Go syntax -- **Disk space**: Mutations are applied in-place with minimal disk usage (<1MB for metadata); verify sufficient temporary storage and available inodes if needed - -## Best Practices - -1. **Focus on High-Value Mutations**: Prioritize error handling and conditionals -2. **Avoid Equivalent Mutants**: Don't generate mutations that don't change behavior -3. **Limit Mutations**: For large controllers, consider sampling strategy -4. **Validate Syntax**: Ensure mutated code is syntactically valid before including in mutation list -5. **Track Coverage**: Keep mapping of which mutations test which behavior -6. **In-Place Efficiency**: Generate metadata only; apply/revert mutations during testing to minimize disk usage - -## Example Usage - -```python -# Generate mutation metadata (no file copies!) -mutations = generate_mutations( - operator_path="/path/to/operator", - mutation_types=["error-handling", "conditional", "requeue"], - output_file=".work/mutation-testing/mutations.json" -) - -print(f"Generated {len(mutations)} mutation definitions") -print(f"Metadata saved to: {output_file} (Total: <1MB)") -print(f"No repository copies created - mutations applied in-place during testing") -``` - -## See Also - -- [mutation-tester skill](../mutation-tester/SKILL.md) - Tests each mutant -- [go-mutesting](https://github.com/zimmski/go-mutesting) - Existing Go mutation testing tool -- [Mutation Testing Best Practices](https://pedrorijo.com/blog/intro-mutation/) - diff --git a/plugins/testing/skills/mutation-generator/apply_mutation.py b/plugins/testing/skills/mutation-generator/apply_mutation.py deleted file mode 100755 index 88d4b1dcd..000000000 --- a/plugins/testing/skills/mutation-generator/apply_mutation.py +++ /dev/null @@ -1,89 +0,0 @@ -#!/usr/bin/env python3 -""" -Apply or revert a single mutation in-place. -No file copying - just modify the original file temporarily. -""" - -import argparse -import json -import sys -from pathlib import Path - - -def apply_mutation(mutation: dict, operator_path: Path) -> bool: - """Apply a mutation to the file.""" - file_path = operator_path / mutation['file'] - line_num = mutation['line'] - - try: - with open(file_path, 'r', encoding='utf-8') as f: - lines = f.readlines() - - if 1 <= line_num <= len(lines): - # Replace the specific line - lines[line_num - 1] = mutation['mutated'] + '\n' - - with open(file_path, 'w', encoding='utf-8') as f: - f.writelines(lines) - - return True - else: - print(f"Error: Line {line_num} out of range in {file_path}") - return False - - except Exception as e: - print(f"Error applying mutation: {e}") - return False - - -def revert_mutation(mutation: dict, operator_path: Path) -> bool: - """Revert a mutation (restore original line).""" - file_path = operator_path / mutation['file'] - line_num = mutation['line'] - - try: - with open(file_path, 'r', encoding='utf-8') as f: - lines = f.readlines() - - if 1 <= line_num <= len(lines): - # Restore original line - lines[line_num - 1] = mutation['original'] + '\n' - - with open(file_path, 'w', encoding='utf-8') as f: - f.writelines(lines) - - return True - else: - print(f"Error: Line {line_num} out of range in {file_path}") - return False - - except Exception as e: - print(f"Error reverting mutation: {e}") - return False - - -def main(): - parser = argparse.ArgumentParser(description='Apply or revert a mutation') - parser.add_argument('--mutation-json', required=True, help='Path to mutation JSON file') - parser.add_argument('--operator-path', required=True, help='Path to operator') - parser.add_argument('--action', choices=['apply', 'revert'], required=True, help='Action to perform') - - args = parser.parse_args() - - # Load mutation - with open(args.mutation_json, 'r') as f: - mutation = json.load(f) - - operator_path = Path(args.operator_path).resolve() - - if args.action == 'apply': - success = apply_mutation(mutation, operator_path) - else: - success = revert_mutation(mutation, operator_path) - - return 0 if success else 1 - - -if __name__ == '__main__': - sys.exit(main()) - diff --git a/plugins/testing/skills/mutation-generator/generate_mutations.py b/plugins/testing/skills/mutation-generator/generate_mutations.py deleted file mode 100755 index 1349526e4..000000000 --- a/plugins/testing/skills/mutation-generator/generate_mutations.py +++ /dev/null @@ -1,462 +0,0 @@ -#!/usr/bin/env python3 -""" -Mutation Generator for Kubernetes Operators - -Generates code mutations for operator controllers to enable mutation testing. -Focuses on patterns common in controller-runtime based operators. -""" - -import argparse -import json -import os -import re -import shutil -import sys -from pathlib import Path -from typing import List, Dict, Any - - -class MutationGenerator: - """Generates mutations for operator controller code.""" - - # Mutation type configurations - CONDITIONAL_OPS = { - '==': ['!='], - '!=': ['=='], - '<': ['>', '<='], - '>': ['<', '>='], - '<=': ['<', '>'], - '>=': ['>', '<'], - '&&': ['||'], - '||': ['&&'], - } - - K8S_ERROR_TYPES = [ - 'IsNotFound', - 'IsAlreadyExists', - 'IsConflict', - 'IsInvalid', - 'IsTimeout', - 'IsServiceUnavailable', - ] - - def __init__(self, operator_path: str, output_dir: str, mutation_types: List[str]): - self.operator_path = Path(operator_path) - self.output_dir = Path(output_dir) - self.mutation_types = mutation_types - self.mutant_counter = 0 - self.mutations = [] - - def generate(self) -> Dict[str, Any]: - """Main entry point to generate all mutations.""" - print(f"🔍 Scanning operator at: {self.operator_path}") - - # Find controller files - controller_files = self._find_controller_files() - print(f"📄 Found {len(controller_files)} controller files") - - # Generate mutations for each file - for controller_file in controller_files: - print(f" Analyzing: {controller_file.relative_to(self.operator_path)}") - self._generate_mutations_for_file(controller_file) - - # Create mutant directories - print(f"\n🧬 Creating {len(self.mutations)} mutants...") - self._create_mutants() - - # Generate summary - summary = self._generate_summary() - - print(f"✓ Generated {len(self.mutations)} mutations") - return summary - - def _find_controller_files(self) -> List[Path]: - """Find all controller files in the operator.""" - controller_files = [] - - # Search patterns - patterns = [ - '**/controllers/*controller*.go', - '**/controllers/*reconciler*.go', - '**/pkg/controller/**/*controller*.go', - '**/pkg/controller/**/*reconciler*.go', - ] - - for pattern in patterns: - for file_path in self.operator_path.glob(pattern): - # Skip test files and vendor directories - if '_test.go' not in str(file_path) and 'vendor' not in file_path.parts: - controller_files.append(file_path) - - return list(set(controller_files)) # Remove duplicates - - def _generate_mutations_for_file(self, file_path: Path): - """Generate mutations for a single controller file.""" - try: - with open(file_path, 'r') as f: - content = f.read() - lines = content.split('\n') - except (OSError, IOError) as e: - print(f"Error: Unable to read file {file_path}: {e}") - return # Skip this file and continue with others - - # Apply each mutation type - if 'conditionals' in self.mutation_types or 'all' in self.mutation_types: - self._mutate_conditionals(file_path, lines) - - if 'error-handling' in self.mutation_types or 'all' in self.mutation_types: - self._mutate_error_handling(file_path, lines) - - if 'returns' in self.mutation_types or 'all' in self.mutation_types: - self._mutate_returns(file_path, lines) - - if 'requeue' in self.mutation_types or 'all' in self.mutation_types: - self._mutate_requeue(file_path, lines) - - if 'status' in self.mutation_types or 'all' in self.mutation_types: - self._mutate_status(file_path, lines) - - if 'api-calls' in self.mutation_types or 'all' in self.mutation_types: - self._mutate_api_calls(file_path, lines) - - def _mutate_conditionals(self, file_path: Path, lines: List[str]): - """Generate conditional operator mutations.""" - for line_num, line in enumerate(lines, 1): - # Skip comments and strings - if line.strip().startswith('//') or line.strip().startswith('/*'): - continue - - # Find conditional operators - for old_op, new_ops in self.CONDITIONAL_OPS.items(): - if old_op in line and ('if ' in line or 'for ' in line or 'return ' in line): - for new_op in new_ops: - self._add_mutation( - file_path=file_path, - line_num=line_num, - original=line, - mutated=line.replace(old_op, new_op, 1), - mutation_type='conditional', - description=f'Change {old_op} to {new_op}', - pattern='comparison-operator' - ) - - def _mutate_error_handling(self, file_path: Path, lines: List[str]): - """Generate error handling mutations.""" - for line_num, line in enumerate(lines, 1): - stripped = line.strip() - - # Pattern 1: if err != nil { return ... } - if 'if err != nil' in stripped and 'return' in stripped: - self._add_mutation( - file_path=file_path, - line_num=line_num, - original=line, - mutated=line.replace(line.strip(), f'// MUTANT: Removed error check - {line.strip()}'), - mutation_type='error-handling', - description='Remove error check', - pattern='remove-error-check' - ) - - # Pattern 2: Kubernetes error type checks - for error_type in self.K8S_ERROR_TYPES: - if error_type in stripped: - for alt_type in self.K8S_ERROR_TYPES: - if alt_type != error_type: - self._add_mutation( - file_path=file_path, - line_num=line_num, - original=line, - mutated=line.replace(error_type, alt_type, 1), - mutation_type='error-handling', - description=f'Change {error_type} to {alt_type}', - pattern='k8s-error-type' - ) - break # Only one alternative per line - - def _mutate_returns(self, file_path: Path, lines: List[str]): - """Generate return statement mutations.""" - for line_num, line in enumerate(lines, 1): - stripped = line.strip() - - # Pattern: return ctrl.Result{}, err - if 'return' in stripped and 'ctrl.Result' in stripped and ', err' in stripped: - self._add_mutation( - file_path=file_path, - line_num=line_num, - original=line, - mutated=line.replace(', err', ', nil'), - mutation_type='returns', - description='Return nil instead of error', - pattern='error-return-nil' - ) - - # Pattern: return nil (in error return position) - if stripped == 'return nil' and line_num > 1: - # Check if previous line is error check - prev_line = lines[line_num - 2].strip() if line_num > 1 else '' - if 'err' in prev_line: - self._add_mutation( - file_path=file_path, - line_num=line_num, - original=line, - mutated=line.replace('return nil', 'return err'), - mutation_type='returns', - description='Return error instead of nil', - pattern='error-return-err' - ) - - def _mutate_requeue(self, file_path: Path, lines: List[str]): - """Generate requeue behavior mutations.""" - for line_num, line in enumerate(lines, 1): - stripped = line.strip() - - # Pattern 1: return ctrl.Result{}, nil (add requeue) - if 'return ctrl.Result{}' in stripped and 'Requeue' not in stripped: - self._add_mutation( - file_path=file_path, - line_num=line_num, - original=line, - mutated=line.replace('ctrl.Result{}', 'ctrl.Result{Requeue: true}'), - mutation_type='requeue', - description='Add unnecessary requeue', - pattern='add-requeue' - ) - - # Pattern 2: Requeue: true (remove requeue) - if 'Requeue: true' in stripped: - self._add_mutation( - file_path=file_path, - line_num=line_num, - original=line, - mutated=line.replace('Requeue: true', 'Requeue: false'), - mutation_type='requeue', - description='Remove requeue flag', - pattern='remove-requeue' - ) - - # Pattern 3: RequeueAfter duration - if 'RequeueAfter:' in stripped: - # Set to zero - self._add_mutation( - file_path=file_path, - line_num=line_num, - original=line, - mutated=re.sub(r'RequeueAfter:\s*[^,}]+', 'RequeueAfter: 0', line), - mutation_type='requeue', - description='Set RequeueAfter to zero', - pattern='requeue-timing-zero' - ) - - def _mutate_status(self, file_path: Path, lines: List[str]): - """Generate status update mutations.""" - for line_num, line in enumerate(lines, 1): - stripped = line.strip() - - # Pattern 1: Status().Update() calls - if '.Status().Update(' in stripped or '.Status().Patch(' in stripped: - self._add_mutation( - file_path=file_path, - line_num=line_num, - original=line, - mutated=line.replace(line.strip(), f'// MUTANT: Skipped status update - {line.strip()}'), - mutation_type='status', - description='Skip status update', - pattern='skip-status-update' - ) - - # Pattern 2: Condition status (True/False) - if 'SetCondition' in stripped or 'Condition' in stripped: - if 'corev1.ConditionTrue' in stripped: - self._add_mutation( - file_path=file_path, - line_num=line_num, - original=line, - mutated=line.replace('corev1.ConditionTrue', 'corev1.ConditionFalse'), - mutation_type='status', - description='Change condition to False', - pattern='condition-value' - ) - elif 'corev1.ConditionFalse' in stripped: - self._add_mutation( - file_path=file_path, - line_num=line_num, - original=line, - mutated=line.replace('corev1.ConditionFalse', 'corev1.ConditionTrue'), - mutation_type='status', - description='Change condition to True', - pattern='condition-value' - ) - - def _mutate_api_calls(self, file_path: Path, lines: List[str]): - """Generate API call mutations with signature-compatible replacements.""" - # Only include mutations with compatible signatures and option types - # Removed Get→List and Update→Patch due to signature incompatibility - api_mutations = [ - ('r.Create(', 'r.Update(', 'Change Create to Update'), - ('r.Update(', 'r.Create(', 'Change Update to Create'), - ] - - for line_num, line in enumerate(lines, 1): - for old_call, new_call, description in api_mutations: - if old_call in line: - self._add_mutation( - file_path=file_path, - line_num=line_num, - original=line, - mutated=line.replace(old_call, new_call, 1), - mutation_type='api-calls', - description=description, - pattern='api-operation-type' - ) - - def _add_mutation(self, file_path: Path, line_num: int, original: str, - mutated: str, mutation_type: str, description: str, pattern: str): - """Add a mutation to the list.""" - self.mutant_counter += 1 - - mutation = { - 'id': f'mutant-{self.mutant_counter:03d}', - 'type': mutation_type, - 'description': description, - 'file': str(file_path.relative_to(self.operator_path)), - 'line': line_num, - 'pattern': pattern, - 'original': original.strip(), - 'mutated': mutated.strip() - } - - self.mutations.append(mutation) - - def _create_mutants(self): - """Create mutant copies of the operator.""" - os.makedirs(self.output_dir, exist_ok=True) - - for i, mutation in enumerate(self.mutations, 1): - mutant_dir = self.output_dir / mutation['id'] - - # Copy operator - if mutant_dir.exists(): - shutil.rmtree(mutant_dir) - shutil.copytree(self.operator_path, mutant_dir) - - # Apply mutation - mutated_file = mutant_dir / mutation['file'] - self._apply_mutation_to_file(mutated_file, mutation) - - # Save metadata - metadata_file = mutant_dir / 'MUTATION.json' - with open(metadata_file, 'w') as f: - json.dump(mutation, f, indent=2) - - # Progress indicator - if i % 10 == 0: - print(f" Created {i}/{len(self.mutations)} mutants...") - - def _apply_mutation_to_file(self, file_path: Path, mutation: Dict[str, Any]): - """Apply mutation to a specific file.""" - try: - with open(file_path, 'r') as f: - lines = f.readlines() - except (OSError, IOError) as e: - print(f"Error: Unable to read file {file_path}: {e}") - return # Skip this file and continue - - target_line = mutation['line'] - 1 - - if target_line < len(lines): - # Replace the line - mutated_content = mutation['mutated'] - - # Preserve indentation - indent = len(lines[target_line]) - len(lines[target_line].lstrip()) - lines[target_line] = ' ' * indent + mutated_content + '\n' - else: - # Out of bounds - log error and skip writing - print(f"Error: Line {mutation['line']} out of bounds in {file_path} " - f"(file has {len(lines)} lines)") - return # Skip writing unmodified file - - # Only write if mutation was actually applied - try: - with open(file_path, 'w') as f: - f.writelines(lines) - except (OSError, IOError) as e: - print(f"Error: Unable to write file {file_path}: {e}") - return # Skip this file and continue - - def _generate_summary(self) -> Dict[str, Any]: - """Generate summary of mutations.""" - mutations_by_type = {} - for mutation in self.mutations: - mut_type = mutation['type'] - mutations_by_type[mut_type] = mutations_by_type.get(mut_type, 0) + 1 - - summary = { - 'total_mutations': len(self.mutations), - 'mutations_by_type': mutations_by_type, - 'mutations': self.mutations, - 'output_dir': str(self.output_dir) - } - - # Save to JSON - summary_file = self.output_dir / 'mutations-summary.json' - with open(summary_file, 'w') as f: - json.dump(summary, f, indent=2) - - return summary - - -def main(): - parser = argparse.ArgumentParser( - description='Generate mutations for Kubernetes operator controllers' - ) - parser.add_argument( - '--operator-path', - default='.', - help='Path to operator repository (default: current directory)' - ) - parser.add_argument( - '--mutation-types', - default='all', - help='Comma-separated mutation types: conditionals,error-handling,returns,requeue,status,api-calls,all (default: all)' - ) - parser.add_argument( - '--output-dir', - default='.work/mutation-testing/mutants', - help='Output directory for mutants (default: .work/mutation-testing/mutants)' - ) - - args = parser.parse_args() - - # Parse mutation types - if args.mutation_types == 'all': - mutation_types = ['all'] - else: - mutation_types = [t.strip() for t in args.mutation_types.split(',')] - - # Generate mutations - generator = MutationGenerator( - operator_path=args.operator_path, - output_dir=args.output_dir, - mutation_types=mutation_types - ) - - summary = generator.generate() - - # Print summary - print("\n" + "="*60) - print("Mutation Generation Summary") - print("="*60) - print(f"Total Mutations: {summary['total_mutations']}") - print("\nBy Type:") - for mut_type, count in summary['mutations_by_type'].items(): - print(f" {mut_type:20s}: {count:3d}") - print(f"\nMutants created in: {summary['output_dir']}") - print("="*60) - - return 0 - - -if __name__ == '__main__': - sys.exit(main()) - diff --git a/plugins/testing/skills/mutation-generator/generate_mutations_efficient.py b/plugins/testing/skills/mutation-generator/generate_mutations_efficient.py deleted file mode 100755 index e3883ba35..000000000 --- a/plugins/testing/skills/mutation-generator/generate_mutations_efficient.py +++ /dev/null @@ -1,386 +0,0 @@ -#!/usr/bin/env python3 -""" -Efficient Mutation Generator for Kubernetes Operators - -Generates mutation metadata WITHOUT copying the repository. -Mutations are applied in-place during testing, then reverted. -""" - -import argparse -import json -import os -import re -import sys -from pathlib import Path -from typing import List, Dict, Any - - -class EfficientMutationGenerator: - """Generates mutation metadata without creating copies.""" - - CONDITIONAL_OPS = { - '==': ['!='], - '!=': ['=='], - '<': ['>'], - '>': ['<'], - '<=': ['>'], - '>=': ['<'], - '&&': ['||'], - '||': ['&&'], - } - - K8S_ERROR_TYPES = [ - 'IsNotFound', - 'IsAlreadyExists', - 'IsConflict', - 'IsInvalid', - ] - - def __init__(self, operator_path: str, mutation_types: List[str]): - self.operator_path = Path(operator_path).resolve() - self.mutation_types = mutation_types - self.mutant_counter = 0 - self.mutations = [] - - def generate(self) -> Dict[str, Any]: - """Generate mutation metadata only (no file copies).""" - print(f"🔍 Scanning operator at: {self.operator_path}") - - controller_files = self._find_controller_files() - print(f"📄 Found {len(controller_files)} controller files") - - for controller_file in controller_files: - rel_path = controller_file.relative_to(self.operator_path) - print(f" Analyzing: {rel_path}") - self._generate_mutations_for_file(controller_file) - - summary = self._generate_summary() - print(f"✓ Generated {len(self.mutations)} mutation definitions") - - return summary - - def _find_controller_files(self) -> List[Path]: - """Find all controller files.""" - controller_files = [] - patterns = [ - '**/controllers/*controller*.go', - '**/controllers/*reconciler*.go', - '**/pkg/controller/**/*controller*.go', - '**/pkg/controller/**/*reconciler*.go', - ] - - for pattern in patterns: - for file_path in self.operator_path.glob(pattern): - # Skip test files and vendor directories - if '_test.go' not in str(file_path) and 'vendor' not in file_path.parts and file_path.is_file(): - controller_files.append(file_path) - - return list(set(controller_files)) - - def _generate_mutations_for_file(self, file_path: Path): - """Generate mutations for a file.""" - try: - with open(file_path, 'r', encoding='utf-8') as f: - lines = f.readlines() - except Exception as e: - print(f" ⚠️ Skipping {file_path.name}: {e}") - return - - # Apply mutation types - if 'conditionals' in self.mutation_types or 'all' in self.mutation_types: - self._mutate_conditionals(file_path, lines) - - if 'error-handling' in self.mutation_types or 'all' in self.mutation_types: - self._mutate_error_handling(file_path, lines) - - if 'returns' in self.mutation_types or 'all' in self.mutation_types: - self._mutate_returns(file_path, lines) - - if 'requeue' in self.mutation_types or 'all' in self.mutation_types: - self._mutate_requeue(file_path, lines) - - if 'status' in self.mutation_types or 'all' in self.mutation_types: - self._mutate_status(file_path, lines) - - if 'api-calls' in self.mutation_types or 'all' in self.mutation_types: - self._mutate_api_calls(file_path, lines) - - def _mutate_conditionals(self, file_path: Path, lines: List[str]): - """Generate conditional mutations.""" - for line_num, line in enumerate(lines, 1): - if line.strip().startswith('//') or line.strip().startswith('/*'): - continue - - for old_op, new_ops in self.CONDITIONAL_OPS.items(): - # Simple substring check for reliable operator detection - if old_op in line and ('if ' in line or 'for ' in line): - for new_op in new_ops: - mutated = line.replace(old_op, new_op, 1) - self._add_mutation( - file_path=file_path, - line_num=line_num, - original_line=line, - mutated_line=mutated, - mutation_type='conditional', - description=f'Change {old_op} to {new_op}', - pattern='comparison-operator' - ) - break # Only one mutation per line - - def _mutate_error_handling(self, file_path: Path, lines: List[str]): - """Generate error handling mutations.""" - for line_num, line in enumerate(lines, 1): - stripped = line.strip() - - # Pattern 1: if err != nil { return ... } - if 'if err != nil' in stripped: - # Comment out the error check - indent = len(line) - len(line.lstrip()) - mutated = ' ' * indent + '// MUTANT: Removed error check - ' + stripped + '\n' - self._add_mutation( - file_path=file_path, - line_num=line_num, - original_line=line, - mutated_line=mutated, - mutation_type='error-handling', - description='Remove error check', - pattern='remove-error-check' - ) - - # Pattern 2: K8s error types - for error_type in self.K8S_ERROR_TYPES: - if error_type in stripped: - for alt_type in self.K8S_ERROR_TYPES: - if alt_type != error_type: - mutated = line.replace(error_type, alt_type, 1) - self._add_mutation( - file_path=file_path, - line_num=line_num, - original_line=line, - mutated_line=mutated, - mutation_type='error-handling', - description=f'Change {error_type} to {alt_type}', - pattern='k8s-error-type' - ) - break - break - - def _mutate_returns(self, file_path: Path, lines: List[str]): - """Generate return mutations.""" - for line_num, line in enumerate(lines, 1): - stripped = line.strip() - - if 'return' in stripped and 'ctrl.Result' in stripped and ', err' in stripped: - mutated = line.replace(', err', ', nil', 1) - self._add_mutation( - file_path=file_path, - line_num=line_num, - original_line=line, - mutated_line=mutated, - mutation_type='returns', - description='Return nil instead of error', - pattern='error-return-nil' - ) - - def _mutate_requeue(self, file_path: Path, lines: List[str]): - """Generate requeue mutations.""" - for line_num, line in enumerate(lines, 1): - stripped = line.strip() - - if 'return ctrl.Result{}' in stripped and 'Requeue' not in stripped: - mutated = line.replace('ctrl.Result{}', 'ctrl.Result{Requeue: true}', 1) - self._add_mutation( - file_path=file_path, - line_num=line_num, - original_line=line, - mutated_line=mutated, - mutation_type='requeue', - description='Add unnecessary requeue', - pattern='add-requeue' - ) - - if 'Requeue: true' in stripped: - mutated = line.replace('Requeue: true', 'Requeue: false', 1) - self._add_mutation( - file_path=file_path, - line_num=line_num, - original_line=line, - mutated_line=mutated, - mutation_type='requeue', - description='Remove requeue flag', - pattern='remove-requeue' - ) - - if 'RequeueAfter:' in stripped: - mutated = re.sub(r'RequeueAfter:\s*[^,}]+', 'RequeueAfter: 0', line) - self._add_mutation( - file_path=file_path, - line_num=line_num, - original_line=line, - mutated_line=mutated, - mutation_type='requeue', - description='Set RequeueAfter to zero', - pattern='requeue-timing-zero' - ) - - def _mutate_status(self, file_path: Path, lines: List[str]): - """Generate status mutations.""" - for line_num, line in enumerate(lines, 1): - stripped = line.strip() - - if '.Status().Update(' in stripped or '.Status().Patch(' in stripped: - indent = len(line) - len(line.lstrip()) - mutated = ' ' * indent + '// MUTANT: Skipped status update - ' + stripped + '\n' - self._add_mutation( - file_path=file_path, - line_num=line_num, - original_line=line, - mutated_line=mutated, - mutation_type='status', - description='Skip status update', - pattern='skip-status-update' - ) - - if 'corev1.ConditionTrue' in stripped: - mutated = line.replace('corev1.ConditionTrue', 'corev1.ConditionFalse', 1) - self._add_mutation( - file_path=file_path, - line_num=line_num, - original_line=line, - mutated_line=mutated, - mutation_type='status', - description='Change condition to False', - pattern='condition-value' - ) - - if 'corev1.ConditionFalse' in stripped: - mutated = line.replace('corev1.ConditionFalse', 'corev1.ConditionTrue', 1) - self._add_mutation( - file_path=file_path, - line_num=line_num, - original_line=line, - mutated_line=mutated, - mutation_type='status', - description='Change condition to True', - pattern='condition-value' - ) - - def _mutate_api_calls(self, file_path: Path, lines: List[str]): - """Generate API call mutations with signature-compatible replacements.""" - # Only include mutations with compatible signatures and option types - # Note: Delete→DeleteAllOf removed due to option type incompatibility - # (DeleteOption vs DeleteAllOfOption) and semantic mismatch - api_mutations = [ - ('r.Create(', 'r.Update(', 'Change Create to Update'), - ('r.Update(', 'r.Create(', 'Change Update to Create'), - ] - - for line_num, line in enumerate(lines, 1): - for old_call, new_call, description in api_mutations: - if old_call in line: - mutated = line.replace(old_call, new_call, 1) - self._add_mutation( - file_path=file_path, - line_num=line_num, - original_line=line, - mutated_line=mutated, - mutation_type='api-calls', - description=description, - pattern='api-operation-type' - ) - break - - def _add_mutation(self, file_path: Path, line_num: int, original_line: str, - mutated_line: str, mutation_type: str, description: str, pattern: str): - """Add mutation to list.""" - self.mutant_counter += 1 - - mutation = { - 'id': f'mutant-{self.mutant_counter:03d}', - 'type': mutation_type, - 'description': description, - 'file': str(file_path.relative_to(self.operator_path)), - 'line': line_num, - 'pattern': pattern, - 'original': original_line.rstrip('\n'), - 'mutated': mutated_line.rstrip('\n') - } - - self.mutations.append(mutation) - - def _generate_summary(self) -> Dict[str, Any]: - """Generate summary.""" - mutations_by_type = {} - for mutation in self.mutations: - mut_type = mutation['type'] - mutations_by_type[mut_type] = mutations_by_type.get(mut_type, 0) + 1 - - return { - 'total_mutations': len(self.mutations), - 'mutations_by_type': mutations_by_type, - 'mutations': self.mutations, - 'operator_path': str(self.operator_path) - } - - -def main(): - parser = argparse.ArgumentParser( - description='Generate efficient mutation metadata for operators' - ) - parser.add_argument( - '--operator-path', - default='.', - help='Path to operator repository' - ) - parser.add_argument( - '--mutation-types', - default='all', - help='Comma-separated mutation types or "all"' - ) - parser.add_argument( - '--output', - default='.work/mutation-testing/mutations.json', - help='Output file for mutation metadata' - ) - - args = parser.parse_args() - - # Parse mutation types - if args.mutation_types == 'all': - mutation_types = ['all'] - else: - mutation_types = [t.strip() for t in args.mutation_types.split(',')] - - # Generate mutations (metadata only) - generator = EfficientMutationGenerator( - operator_path=args.operator_path, - mutation_types=mutation_types - ) - - summary = generator.generate() - - # Save to file - output_path = Path(args.output) - output_path.parent.mkdir(parents=True, exist_ok=True) - - with open(output_path, 'w') as f: - json.dump(summary, f, indent=2) - - # Print summary - print("\n" + "="*60) - print("Mutation Generation Summary") - print("="*60) - print(f"Total Mutations: {summary['total_mutations']}") - print("\nBy Type:") - for mut_type, count in summary['mutations_by_type'].items(): - print(f" {mut_type:20s}: {count:3d}") - print(f"\nMetadata saved to: {output_path}") - print("="*60) - print("\n💡 TIP: No repository copies created - mutations applied in-place during testing!") - - return 0 - - -if __name__ == '__main__': - sys.exit(main()) - diff --git a/plugins/testing/skills/mutation-tester/SKILL.md b/plugins/testing/skills/mutation-tester/SKILL.md deleted file mode 100644 index 719e13544..000000000 --- a/plugins/testing/skills/mutation-tester/SKILL.md +++ /dev/null @@ -1,584 +0,0 @@ ---- -name: Mutation Testing Executor -description: Execute tests against generated mutants and analyze results to validate test suite quality for Kubernetes operators ---- - -# Mutation Testing Executor - -This skill executes the test suite against each generated mutant and analyzes the results to determine test suite effectiveness. A killed mutant (test fails) indicates good test coverage; a survived mutant (test passes) indicates a gap in testing. - -## When to Use This Skill - -Use this skill when: -- You have generated mutants and need to test them -- You want to calculate mutation score for operator controllers -- You're analyzing test suite quality -- You need to identify missing test cases - -## Prerequisites - -1. **Generated mutants** from mutation-generator skill -2. **Working test suite** that passes on original code -3. **Go toolchain** installed (go test) -4. **Sufficient time** - mutation testing is computationally expensive - -## Implementation Steps - -### Step 1: Validate Baseline Tests - -Before testing mutants, ensure original tests pass: - -**1.1 Run Baseline Tests** - -```bash -echo "Running baseline tests against original code..." -cd "${OPERATOR_PATH}" - -go test ./controllers/... -v -timeout 10m \ - > "${WORK_DIR}/baseline-test-output.txt" 2>&1 - -BASELINE_EXIT=$? - -if [ $BASELINE_EXIT -ne 0 ]; then - echo "❌ ERROR: Baseline tests failed" - echo "Fix failing tests before running mutation testing" - cat "${WORK_DIR}/baseline-test-output.txt" - exit 1 -fi - -echo "✓ Baseline tests passed" -``` - -**1.2 Record Baseline Metrics** - -```bash -# Count tests -TOTAL_TESTS=$(grep -c "^=== RUN" "${WORK_DIR}/baseline-test-output.txt" || echo "0") - -# Execution time -TEST_DURATION=$(grep "^PASS" "${WORK_DIR}/baseline-test-output.txt" | \ - awk '{print $NF}' | sed 's/[^0-9.]//g' | head -1) - -echo "Baseline: $TOTAL_TESTS tests in ${TEST_DURATION}s" -``` - ---- - -### Step 2: Test Each Mutant - -**2.1 Iterate Through Mutants** - -```bash -MUTANTS_DIR="${WORK_DIR}/mutants" -RESULTS_DIR="${WORK_DIR}/results" -mkdir -p "$RESULTS_DIR" - -# Initialize counters -TOTAL_MUTANTS=$(find "$MUTANTS_DIR" -mindepth 1 -maxdepth 1 -type d | wc -l) -KILLED_MUTANTS=0 -SURVIVED_MUTANTS=0 -TIMEOUT_MUTANTS=0 -ERROR_MUTANTS=0 - -echo "" -echo "Testing $TOTAL_MUTANTS mutants..." -echo "This may take a while (estimated: $((TOTAL_MUTANTS * TEST_DURATION / 60)) minutes)" -echo "" - -CURRENT=0 - -for mutant_dir in "$MUTANTS_DIR"/mutant-*; do - CURRENT=$((CURRENT + 1)) - MUTANT_ID=$(basename "$mutant_dir") - - # Load mutation metadata - MUTATION_FILE="$mutant_dir/MUTATION.json" - if [ ! -f "$MUTATION_FILE" ]; then - echo "[$CURRENT/$TOTAL_MUTANTS] ⚠️ $MUTANT_ID: No metadata" - continue - fi - - MUTATION_DESC=$(jq -r '.description' "$MUTATION_FILE") - MUTATION_TYPE=$(jq -r '.type' "$MUTATION_FILE") - - # Progress indicator - echo -n "[$CURRENT/$TOTAL_MUTANTS] Testing $MUTANT_ID ($MUTATION_TYPE)... " - - # Test mutant (with timeout) - cd "$mutant_dir" - - # Use timeout to prevent hanging tests - timeout 300s go test ./controllers/... -v \ - > "$RESULTS_DIR/${MUTANT_ID}-output.txt" 2>&1 - - TEST_EXIT=$? - cd - > /dev/null - - # Analyze result - if [ $TEST_EXIT -eq 124 ]; then - # Timeout occurred - echo "⏱️ KILLED (timeout)" - KILLED_MUTANTS=$((KILLED_MUTANTS + 1)) - TIMEOUT_MUTANTS=$((TIMEOUT_MUTANTS + 1)) - STATUS="killed-timeout" - - elif [ $TEST_EXIT -ne 0 ]; then - # Tests failed - mutant killed (GOOD!) - echo "✓ KILLED" - KILLED_MUTANTS=$((KILLED_MUTANTS + 1)) - STATUS="killed" - - else - # Tests passed - mutant survived (BAD - indicates missing test) - echo "⚠️ SURVIVED" - SURVIVED_MUTANTS=$((SURVIVED_MUTANTS + 1)) - STATUS="survived" - fi - - # Save result metadata (using jq for safe JSON construction) - jq -n \ - --arg mutant_id "$MUTANT_ID" \ - --arg status "$STATUS" \ - --argjson exit_code "$TEST_EXIT" \ - --arg output_file "$RESULTS_DIR/${MUTANT_ID}-output.txt" \ - --slurpfile mutation "$MUTATION_FILE" \ - '{ - mutant_id: $mutant_id, - status: $status, - exit_code: $exit_code, - mutation: $mutation[0], - output_file: $output_file - }' > "$RESULTS_DIR/${MUTANT_ID}-result.json" - -done - -cd "$OPERATOR_PATH" -``` - -**2.2 Display Progress** - -For long-running mutation testing, show periodic updates: - -```bash -# Every 10 mutants, show summary -if [ $((CURRENT % 10)) -eq 0 ] && [ "$CURRENT" -gt 0 ]; then - MUTATION_SCORE=$(awk "BEGIN {printf \"%.1f\", ($KILLED_MUTANTS / $CURRENT) * 100}") - echo "" - echo " Progress: $CURRENT/$TOTAL_MUTANTS ($MUTATION_SCORE% killed so far)" - echo "" -fi -``` - ---- - -### Step 3: Calculate Mutation Score - -**3.1 Compute Overall Score** - -```bash -# Mutation Score = (Killed / Total) * 100 -# Check for zero or unset TOTAL_MUTANTS to avoid division by zero -if [ -z "$TOTAL_MUTANTS" ] || [ "$TOTAL_MUTANTS" -eq 0 ]; then - echo "❌ ERROR: No mutants generated (TOTAL_MUTANTS=$TOTAL_MUTANTS)" - exit 1 -fi - -MUTATION_SCORE=$(awk "BEGIN {printf \"%.2f\", ($KILLED_MUTANTS / $TOTAL_MUTANTS) * 100}") - -echo "" -echo "════════════════════════════════════════════════════════════" -echo " MUTATION TESTING RESULTS" -echo "════════════════════════════════════════════════════════════" -echo "" -echo "Total Mutants: $TOTAL_MUTANTS" -echo "Killed (Good): $KILLED_MUTANTS" -echo " - By tests: $((KILLED_MUTANTS - TIMEOUT_MUTANTS))" -echo " - By timeout: $TIMEOUT_MUTANTS" -echo "Survived (Bad): $SURVIVED_MUTANTS" -echo "" -echo "Mutation Score: ${MUTATION_SCORE}%" -echo "" -``` - -**3.2 Interpret Score** - -```bash -if (( $(echo "$MUTATION_SCORE >= 90" | bc -l) )); then - echo "✓✓ EXCELLENT - Strong test suite!" - VERDICT="excellent" -elif (( $(echo "$MUTATION_SCORE >= 80" | bc -l) )); then - echo "✓ GOOD - Solid test coverage" - VERDICT="good" -elif (( $(echo "$MUTATION_SCORE >= 70" | bc -l) )); then - echo "⚠️ FAIR - Room for improvement" - VERDICT="fair" -else - echo "❌ POOR - Significant gaps in test coverage" - VERDICT="poor" -fi - -echo "════════════════════════════════════════════════════════════" -echo "" -``` - -**3.3 Calculate Score by Mutation Type** - -```bash -# Analyze by mutation type -echo "Mutation Score by Type:" -echo "" - -for mut_type in conditionals error-handling returns requeue status api-calls; do - TYPE_TOTAL=$(jq -r "select(.mutation.type == \"$mut_type\") | .mutant_id" \ - "$RESULTS_DIR"/*-result.json 2>/dev/null | wc -l) - - if [ "$TYPE_TOTAL" -gt 0 ]; then - TYPE_KILLED=$(jq -r "select(.mutation.type == \"$mut_type\" and .status == \"killed\") | .mutant_id" \ - "$RESULTS_DIR"/*-result.json 2>/dev/null | wc -l) - - TYPE_SCORE=$(awk "BEGIN {printf \"%.1f\", ($TYPE_KILLED / $TYPE_TOTAL) * 100}") - - printf " %-20s: %5.1f%% (%d/%d)\n" "$mut_type" "$TYPE_SCORE" "$TYPE_KILLED" "$TYPE_TOTAL" - fi -done - -echo "" -``` - ---- - -### Step 4: Analyze Survived Mutants - -**4.1 Identify High-Priority Survived Mutants** - -```bash -echo "Analyzing survived mutants..." -echo "" - -# Critical types that should not survive -CRITICAL_TYPES=("error-handling" "conditionals") - -# List survived mutants -SURVIVED_MUTANTS_LIST=() - -for result_file in "$RESULTS_DIR"/*-result.json; do - STATUS=$(jq -r '.status' "$result_file") - - if [ "$STATUS" == "survived" ]; then - MUTANT_ID=$(jq -r '.mutant_id' "$result_file") - SURVIVED_MUTANTS_LIST+=("$MUTANT_ID") - fi -done - -if [ ${#SURVIVED_MUTANTS_LIST[@]} -eq 0 ]; then - echo "✓ No survived mutants - all mutations were caught by tests!" -else - echo "⚠️ ${#SURVIVED_MUTANTS_LIST[@]} survived mutants need attention:" - echo "" - - # Show top 10 most critical - COUNT=0 - for mutant_id in "${SURVIVED_MUTANTS_LIST[@]}"; do - if [ $COUNT -ge 10 ]; then - echo " ... and $((${#SURVIVED_MUTANTS_LIST[@]} - 10)) more" - break - fi - - RESULT_FILE="$RESULTS_DIR/${mutant_id}-result.json" - - TYPE=$(jq -r '.mutation.type' "$RESULT_FILE") - DESC=$(jq -r '.mutation.description' "$RESULT_FILE") - FILE=$(jq -r '.mutation.file' "$RESULT_FILE") - LINE=$(jq -r '.mutation.line' "$RESULT_FILE") - - echo " $mutant_id:" - echo " Type: $TYPE" - echo " Location: $FILE:$LINE" - echo " Mutation: $DESC" - echo "" - - COUNT=$((COUNT + 1)) - done -fi -``` - -**4.2 Generate Recommendations** - -For each survived mutant, suggest what test to add: - -```bash -echo "Recommendations:" -echo "" - -for mutant_id in "${SURVIVED_MUTANTS_LIST[@]:0:5}"; do # Top 5 - RESULT_FILE="$RESULTS_DIR/${mutant_id}-result.json" - - TYPE=$(jq -r '.mutation.type' "$RESULT_FILE") - FILE=$(jq -r '.mutation.file' "$RESULT_FILE") - PATTERN=$(jq -r '.mutation.pattern' "$RESULT_FILE") - - case "$TYPE" in - "error-handling") - echo "→ $mutant_id: Add test case for error handling in $FILE" - echo " Suggestion: Test what happens when API call fails" - ;; - "conditionals") - echo "→ $mutant_id: Add test for opposite condition in $FILE" - echo " Suggestion: Test both true and false branches" - ;; - "requeue") - echo "→ $mutant_id: Validate requeue behavior in $FILE" - echo " Suggestion: Assert Result.Requeue value in test" - ;; - "status") - echo "→ $mutant_id: Verify status updates in $FILE" - echo " Suggestion: Assert object status after reconciliation" - ;; - esac - echo "" -done -``` - ---- - -### Step 5: Generate Reports - -**5.1 Create JSON Report** - -```bash -# Compile all results into single JSON (using jq for safe construction) -# First, collect survived mutant result files -SURVIVED_JSON_FILES=() -for mutant_id in "${SURVIVED_MUTANTS_LIST[@]}"; do - if [ -f "$RESULTS_DIR/${mutant_id}-result.json" ]; then - SURVIVED_JSON_FILES+=("$RESULTS_DIR/${mutant_id}-result.json") - fi -done - -# Build report with jq -jq -n \ - --argjson total "$TOTAL_MUTANTS" \ - --argjson killed "$KILLED_MUTANTS" \ - --argjson survived "$SURVIVED_MUTANTS" \ - --argjson timeout "$TIMEOUT_MUTANTS" \ - --argjson score "$MUTATION_SCORE" \ - --arg verdict "$VERDICT" \ - --argjson duration "$TEST_DURATION" \ - --arg timestamp "$(date -Iseconds)" \ - --slurpfile survived_mutants <(jq -s '.' "${SURVIVED_JSON_FILES[@]}" 2>/dev/null || echo '[]') \ - '{ - summary: { - total_mutants: $total, - killed: $killed, - survived: $survived, - timeout: $timeout, - mutation_score: $score, - verdict: $verdict, - test_duration_seconds: $duration, - timestamp: $timestamp - }, - survived_mutants: $survived_mutants[0] - }' > "$RESULTS_DIR/mutation-report.json" -``` - -**5.2 Generate HTML Report** - -Create visual HTML report (simplified version): - -```bash -cat > "$RESULTS_DIR/mutation-report.html" << 'EOF' - - - - Mutation Testing Report - - - -
-

🧬 Mutation Testing Report

- -
-
-
$MUTATION_SCORE%
-
Mutation Score
-
-
-
$KILLED_MUTANTS
-
Killed (Good)
-
-
-
$SURVIVED_MUTANTS
-
Survived (Bad)
-
-
-
$TOTAL_MUTANTS
-
Total Mutants
-
-
- -

Verdict: $(echo $VERDICT | tr '[:lower:]' '[:upper:]')

- -

Survived Mutants (Need Attention)

- - - - - - - -EOF - -# Add survived mutants to table -for mutant_id in "${SURVIVED_MUTANTS_LIST[@]}"; do - RESULT_FILE="$RESULTS_DIR/${mutant_id}-result.json" - TYPE=$(jq -r '.mutation.type' "$RESULT_FILE") - FILE=$(jq -r '.mutation.file' "$RESULT_FILE") - LINE=$(jq -r '.mutation.line' "$RESULT_FILE") - DESC=$(jq -r '.mutation.description' "$RESULT_FILE") - - cat >> "$RESULTS_DIR/mutation-report.html" << EOF - - - - - - -EOF -done - -cat >> "$RESULTS_DIR/mutation-report.html" << 'EOF' -
IDTypeLocationDescription
$mutant_id$TYPE$FILE:$LINE$DESC
-
- - -EOF - -echo "📊 HTML Report: file://$(realpath "$RESULTS_DIR/mutation-report.html")" -``` - -**5.3 Generate Markdown Report** - -```bash -cat > "$RESULTS_DIR/mutation-report.md" << EOF -# Mutation Testing Report - -**Date:** $(date '+%Y-%m-%d %H:%M:%S') - -## Summary - -| Metric | Value | -|--------|-------| -| **Mutation Score** | **${MUTATION_SCORE}%** | -| Total Mutants | $TOTAL_MUTANTS | -| Killed (Good) | $KILLED_MUTANTS | -| Survived (Bad) | $SURVIVED_MUTANTS | -| Verdict | ${VERDICT^^} | - -## Mutation Score by Type - -EOF - -# Add score by type table -for mut_type in conditionals error-handling returns requeue status api-calls; do - TYPE_TOTAL=$(jq -r "select(.mutation.type == \"$mut_type\") | .mutant_id" \ - "$RESULTS_DIR"/*-result.json 2>/dev/null | wc -l) - - if [ "$TYPE_TOTAL" -gt 0 ]; then - TYPE_KILLED=$(jq -r "select(.mutation.type == \"$mut_type\" and .status == \"killed\") | .mutant_id" \ - "$RESULTS_DIR"/*-result.json 2>/dev/null | wc -l) - TYPE_SCORE=$(awk "BEGIN {printf \"%.1f\", ($TYPE_KILLED / $TYPE_TOTAL) * 100}") - - echo "| $mut_type | ${TYPE_SCORE}% | $TYPE_KILLED/$TYPE_TOTAL |" >> "$RESULTS_DIR/mutation-report.md" - fi -done - -cat >> "$RESULTS_DIR/mutation-report.md" << 'EOF' - -## Survived Mutants - -The following mutants survived, indicating gaps in test coverage: - -| Mutant ID | Type | Location | Description | -|-----------|------|----------|-------------| -EOF - -for mutant_id in "${SURVIVED_MUTANTS_LIST[@]}"; do - RESULT_FILE="$RESULTS_DIR/${mutant_id}-result.json" - TYPE=$(jq -r '.mutation.type' "$RESULT_FILE") - FILE=$(jq -r '.mutation.file' "$RESULT_FILE") - LINE=$(jq -r '.mutation.line' "$RESULT_FILE") - DESC=$(jq -r '.mutation.description' "$RESULT_FILE") - - echo "| $mutant_id | $TYPE | $FILE:$LINE | $DESC |" >> "$RESULTS_DIR/mutation-report.md" -done - -echo "" >> "$RESULTS_DIR/mutation-report.md" -``` - ---- - -## Performance Optimization - -**Parallel Testing** - -For faster mutation testing, test mutants in parallel: - -```bash -# Use GNU parallel if available -if command -v parallel &> /dev/null; then - export -f test_single_mutant - - ls "$MUTANTS_DIR"/mutant-* | \ - parallel -j 4 --bar test_single_mutant {} -fi -``` - -**Incremental Testing** - -Only test new mutants if running repeatedly: - -```bash -# Skip already-tested mutants -for mutant_dir in "$MUTANTS_DIR"/mutant-*; do - MUTANT_ID=$(basename "$mutant_dir") - RESULT_FILE="$RESULTS_DIR/${MUTANT_ID}-result.json" - - if [ -f "$RESULT_FILE" ]; then - echo "Skipping $MUTANT_ID (already tested)" - continue - fi - - # Test mutant... -done -``` - -## Output Files - -- `mutation-report.json` - Complete results in JSON format -- `mutation-report.html` - Visual HTML report (default) -- `mutation-report.md` - Markdown report for PRs -- `{mutant-id}-output.txt` - Test output for each mutant -- `{mutant-id}-result.json` - Result metadata for each mutant - -## See Also - -- [mutation-generator skill](../mutation-generator/SKILL.md) -- [Mutation Testing Best Practices](https://pedrorijo.com/blog/intro-mutation/) - diff --git a/plugins/utils/.claude-plugin/plugin.json b/plugins/utils/.claude-plugin/plugin.json index 424a4f989..4a311da77 100644 --- a/plugins/utils/.claude-plugin/plugin.json +++ b/plugins/utils/.claude-plugin/plugin.json @@ -1,7 +1,7 @@ { "name": "utils", "description": "A generic utilities plugin serving as a catch-all for various helper commands and agents", - "version": "0.0.9", + "version": "0.0.10", "author": { "name": "github.com/openshift-eng" } diff --git a/plugins/utils/commands/placeholder.md b/plugins/utils/commands/placeholder.md deleted file mode 100644 index aa9980dcc..000000000 --- a/plugins/utils/commands/placeholder.md +++ /dev/null @@ -1,26 +0,0 @@ ---- -description: Placeholder command for the utils plugin ---- - -## Name -utils:placeholder - -## Synopsis -``` -/utils:placeholder -``` - -## Description -This is a placeholder command for the utils plugin. The utils plugin serves as a catch-all location for introducing new generic commands. Once enough related commands are accumulated, they can be segregated into more targeted, specialized plugins. - -This placeholder exists to maintain the plugin structure and will be replaced with actual utility commands as they are developed. - -## Implementation -The utils plugin provides a home for: -- Generic helper commands that don't fit into existing specialized plugins -- Experimental commands that may later be moved to dedicated plugins -- Common utilities that benefit multiple workflows -- Commands that are waiting to be grouped with similar functionality - -## Arguments: -None diff --git a/plugins/yaml/.claude-plugin/plugin.json b/plugins/yaml/.claude-plugin/plugin.json deleted file mode 100644 index 8dcd1826c..000000000 --- a/plugins/yaml/.claude-plugin/plugin.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "name": "yaml", - "version": "0.0.2", - "description": "Generate comprehensive YAML documentation from Go struct definitions with sensible default values", - "author": { - "name": "saswatamcode" - }, - "license": "Apache-2.0" -} diff --git a/plugins/yaml/README.md b/plugins/yaml/README.md deleted file mode 100644 index 2b203cc54..000000000 --- a/plugins/yaml/README.md +++ /dev/null @@ -1,18 +0,0 @@ -# YAML Plugin - -YAML documentation and utilities for Claude Code. - -## Commands - -### `/yaml:docs` - -Generate or query documentation for YAML files and structures. - -See [commands/docs.md](commands/docs.md) for full documentation. - -## Installation - -```bash -/plugin install yaml@ai-helpers -``` - diff --git a/plugins/yaml/commands/docs.md b/plugins/yaml/commands/docs.md deleted file mode 100644 index 3683dbe65..000000000 --- a/plugins/yaml/commands/docs.md +++ /dev/null @@ -1,168 +0,0 @@ ---- -description: Generate comprehensive YAML documentation from Go struct definitions with sensible default values -argument-hint: "[file:StructName] [output.md]" ---- - -## Name -yaml:docs - -## Synopsis -``` -/yaml:docs [file:StructName] [output.md] -``` - -## Description -The `yaml:docs` command generates comprehensive YAML documentation from Go struct definitions. It analyzes Go structs and produces complete, well-documented YAML configuration examples with intelligent default values for all fields. - -This command is designed to help developers quickly create YAML configuration documentation by: -- Automatically generating sensible default values for all struct fields -- Adding inline comments explaining each field's purpose and constraints -- Maintaining proper YAML formatting and structure -- Supporting nested structs, slices, maps, and complex types -- Respecting struct tags (yaml, json, validate, default) - -The spec sections is inspired by https://man7.org/linux/man-pages/man7/man-pages.7.html#top_of_page - -## Implementation - -You are a specialized tool for generating comprehensive YAML documentation from Go struct definitions. - -### Task - -Analyze the provided Go struct and generate complete YAML documentation with: -- All fields populated with intelligent, sensible default values (never leave fields empty) -- Inline comments explaining each field's purpose and constraints -- Proper YAML formatting and structure -- Nested YAML for embedded structs with all sub-fields populated - -### Input Handling - -The user may provide input in these formats: -1. `$1 $2` - File path with struct name (e.g., `pkg/api/types.go:MetricsConfig`) and optional output file path -2. `$1` - Just the file path with struct name -3. Selected code containing a Go struct definition (no arguments) - -### Instructions - -1. **Locate the struct:** - - If a file path is provided (format: `file.go:StructName`), read that file and find the specified struct - - If code is selected, use the selected Go struct definition - - Search for the struct definition and any embedded struct types - -2. **Analyze struct metadata:** - - Examine struct tags: `yaml`, `json`, `validate`, `default` - - Note validation constraints (min, max, required, etc.) - - Identify field types (strings, ints, bools, slices, maps, nested structs, pointers) - - Preserve field ordering from the struct definition - -3. **Generate intelligent defaults:** - - **Strings**: Use contextually appropriate values based on field names (e.g., "localhost" for host, "info" for log level) - - **Integers**: Use common sensible values (e.g., 8080 for port, 30 for timeout seconds) - - **Booleans**: Default to `false` unless the field name suggests otherwise - - **Durations**: Use human-readable format (e.g., "30s", "5m", "1h") - - **Slices**: Provide 1-2 example values in array format - - **Maps**: Provide 1-2 example key-value pairs - - **Nested structs**: Recursively populate all sub-fields - - **Pointers**: Treat as optional but still provide example values - -4. **Format the output:** - - Use proper YAML indentation (2 spaces) - - Add inline comments with `#` explaining each field - - Include validation constraints in comments where applicable - - Add section headers for major struct groups - - Ensure valid YAML syntax - -5. **Write the output:** - - If an output file path is provided as `$2`, use the Write tool to create that file with the generated YAML content (write pure YAML, not markdown) - - Otherwise, display the generated YAML to the user in a markdown code block with yaml syntax highlighting - -### Important Behaviors - -- **ALWAYS populate all fields** - never leave fields empty or use placeholder text -- Infer contextually appropriate defaults from field names and types -- Include helpful comments explaining what each field does -- Maintain the struct's field order in the YAML output -- Handle complex nested structures by recursively applying these rules - -## Return Value -- **Claude agent text**: Generated YAML documentation with intelligent defaults and inline comments -- **File output** (if $2 provided): YAML file written to the specified path - -## Examples - -### Example 1: Basic usage with file path and struct name -``` -/yaml:docs pkg/config/server.go:ServerConfig -``` - -Input struct: -```go -type ServerConfig struct { - Host string `yaml:"host" json:"host" validate:"required"` - Port int `yaml:"port" json:"port" validate:"min=1,max=65535"` - Timeout time.Duration `yaml:"timeout" json:"timeout"` - Debug bool `yaml:"debug" json:"debug"` - Features []string `yaml:"features" json:"features"` -} -``` - -Output: -```yaml -# Server configuration -host: "localhost" # Required: Server hostname or IP address -port: 8080 # Port number (1-65535) -timeout: "30s" # Request timeout duration -debug: false # Enable debug logging -features: ["metrics", "tracing"] # List of enabled features -``` - -### Example 2: Complex nested structs with output file -``` -/yaml:docs pkg/config/database.go:DatabaseConfig config/database.yaml -``` - -Input struct: -```go -type DatabaseConfig struct { - Host string `yaml:"host"` - Port int `yaml:"port"` - SSL SSLConfig `yaml:"ssl"` - Pools map[string]int `yaml:"pools"` - Metadata *MetadataConfig `yaml:"metadata,omitempty"` -} - -type SSLConfig struct { - Enabled bool `yaml:"enabled"` - CertFile string `yaml:"cert_file"` - KeyFile string `yaml:"key_file"` -} -``` - -Generated YAML (written to config/database.yaml): -```yaml -# Database configuration -host: "localhost" # Database host -port: 5432 # Database port -ssl: # SSL configuration - enabled: true # Enable SSL connection - cert_file: "/etc/ssl/certs/db.crt" # SSL certificate file path - key_file: "/etc/ssl/private/db.key" # SSL private key file path -pools: # Connection pools configuration - read: 10 # Read connection pool size - write: 5 # Write connection pool size -metadata: # Optional metadata configuration - cache_ttl: "1h" # Cache time-to-live - sync_interval: "5m" # Sync interval -``` - -### Example 3: Using with selected code -Select a Go struct definition in your editor, then run: -``` -/yaml:docs -``` - -The command will generate YAML documentation from the selected struct. - -## Arguments -- $1: File path and struct name in format `file.go:StructName` (e.g., `pkg/api/types.go:MetricsConfig`), or selected code containing a Go struct definition -- $2: (Optional) Output file path where the generated YAML will be written (e.g., `config/example.yaml`)