From e7a4dfabf558e0bf9b9f6df145e1e4703602756c Mon Sep 17 00:00:00 2001 From: richfrem Date: Thu, 5 Mar 2026 07:59:17 +0900 Subject: [PATCH 1/6] updating base plugins --- .../adr-manager/.claude-plugin/plugin.json | 12 +- .../skills/adr-management/SKILL.md | 1 + .../skills/adr-management/evals/evals.json | 30 ++ .../references/fallback-tree.md | 17 + .../agent-loops/.claude-plugin/plugin.json | 14 +- .../agent-loops/skills/agent-swarm/SKILL.md | 2 + .../skills/agent-swarm/evals/evals.json | 30 ++ .../references/acceptance-criteria.md | 12 + .../agent-swarm/references/fallback-tree.md | 18 + plugins/agent-loops/skills/dual-loop/SKILL.md | 1 + .../skills/dual-loop/evals/evals.json | 30 ++ .../references/acceptance-criteria.md | 13 + .../dual-loop/references/fallback-tree.md | 17 + .../agent-loops/skills/learning-loop/SKILL.md | 1 + .../skills/learning-loop/evals/evals.json | 30 ++ .../references/acceptance-criteria.md | 12 + .../learning-loop/references/fallback-tree.md | 17 + .../agent-loops/skills/orchestrator/SKILL.md | 1 + .../skills/orchestrator/evals/evals.json | 30 ++ .../orchestrator/references/fallback-tree.md | 17 + .../skills/red-team-review/SKILL.md | 3 + .../skills/red-team-review/evals/evals.json | 30 ++ .../references/acceptance-criteria.md | 12 + .../references/fallback-tree.md | 17 + .../.claude-plugin/plugin.json | 16 +- plugins/agent-plugin-analyzer/README.md | 14 + .../agents/l5-red-team-auditor.md | 77 ++++ .../research/anthropic-skills-learnings.md | 35 ++ .../research/pdf-skill-learnings.md | 26 ++ .../skills/analyze-plugin/SKILL.md | 4 + .../skills/analyze-plugin/evals/evals.json | 30 ++ .../references/fallback-tree.md | 17 + .../references/security-checks.md | 3 + .../skills/audit-plugin-l5/CONNECTORS.md | 1 + .../skills/audit-plugin-l5/SKILL.md | 39 ++ .../audit-plugin-l5/audit-plugin-l5-flow.mmd | 5 + .../skills/audit-plugin-l5/evals/evals.json | 30 ++ .../references/acceptance-criteria.md | 1 + .../references/architecture.md | 1 + .../references/fallback-tree.md | 17 + .../skills/audit-plugin-l5/scripts/execute.py | 16 + .../skills/synthesize-learnings/SKILL.md | 4 + .../synthesize-learnings/evals/evals.json | 30 ++ .../references/fallback-tree.md | 17 + .../.claude-plugin/plugin.json | 30 +- .../references/hitl-interaction-design.md | 1 + .../references/pattern-decision-matrix.md | 67 ++- plugins/agent-scaffolders/scripts/scaffold.py | 24 +- .../skills/audit-plugin/SKILL.md | 3 + .../skills/audit-plugin/evals/evals.json | 30 ++ .../audit-plugin/references/fallback-tree.md | 17 + .../skills/create-agentic-workflow/SKILL.md | 1 + .../create-agentic-workflow/evals/evals.json | 30 ++ .../references/fallback-tree.md | 17 + .../create-azure-agent/evals/evals.json | 30 ++ .../references/fallback-tree.md | 17 + .../skills/create-docker-skill/SKILL.md | 1 + .../create-docker-skill/evals/evals.json | 30 ++ .../references/fallback-tree.md | 17 + .../skills/create-github-action/SKILL.md | 1 + .../create-github-action/evals/evals.json | 30 ++ .../references/fallback-tree.md | 17 + .../skills/create-hook/SKILL.md | 1 + .../skills/create-hook/evals/evals.json | 30 ++ .../create-hook/references/fallback-tree.md | 17 + .../skills/create-legacy-command/SKILL.md | 1 + .../create-legacy-command/evals/evals.json | 30 ++ .../references/fallback-tree.md | 17 + .../skills/create-mcp-integration/SKILL.md | 1 + .../create-mcp-integration/evals/evals.json | 30 ++ .../references/fallback-tree.md | 17 + .../skills/create-plugin/SKILL.md | 14 +- .../skills/create-plugin/evals/evals.json | 30 ++ .../create-plugin/references/fallback-tree.md | 17 + .../skills/create-skill/SKILL.md | 25 +- .../skills/create-skill/evals/evals.json | 30 ++ .../create-skill/references/fallback-tree.md | 17 + .../references/hitl-interaction-design.md | 177 -------- .../skills/create-stateful-skill/SKILL.md | 3 + .../create-stateful-skill/evals/evals.json | 30 ++ .../references/fallback-tree.md | 17 + .../skills/create-sub-agent/SKILL.md | 1 + .../skills/create-sub-agent/evals/evals.json | 30 ++ .../references/fallback-tree.md | 17 + .../.claude-plugin/plugin.json | 15 +- ...artifact-generation-xss-compliance-gate.md | 25 ++ .../asynchronous-benchmark-metric-capture.md | 25 ++ .../client-side-compute-sandbox-constraint.md | 26 ++ .../delegated-constraint-verification-loop.md | 25 ++ .../dynamic-specification-fetching.md | 26 ++ .../explicit-seed-anchored-determinism.md | 26 ++ .../highly-procedural-fallback-trees.md | 26 ++ .../iteration-directory-isolation.md | 26 ++ .../local-interactive-output-viewer-loop.md | 28 ++ .../multi-variant-trigger-optimizer.md | 27 ++ .../negative-instruction-constraint.md | 36 ++ .../passive-style-injection-payload.md | 31 ++ .../rigorous-benchmarking-loop.md | 28 ++ .../tainted-context-cleanser.md | 26 ++ .../trigger-description-optimization-loop.md | 28 ++ .../ui-degradation-constraint.md | 28 ++ .../ecosystem-authoritative-sources/SKILL.md | 1 + .../evals/evals.json | 30 ++ .../reference/skill-evaluation-and-testing.md | 45 +++ .../references/fallback-tree.md | 17 + .../skills/ecosystem-standards/SKILL.md | 1 + .../ecosystem-standards/evals/evals.json | 30 ++ .../references/fallback-tree.md | 17 + .../{ => .claude-plugin}/plugin.json | 8 +- .../skills/claude-cli-agent/SKILL.md | 1 + .../skills/claude-cli-agent/evals/evals.json | 30 ++ .../references/acceptance-criteria.md | 17 + .../references/fallback-tree.md | 17 + .../.claude-plugin/plugin.json | 14 +- .../skills/coding-conventions/SKILL.md | 1 + .../coding-conventions/evals/evals.json | 30 ++ .../references/fallback-tree.md | 17 + .../skills/conventions-agent/SKILL.md | 1 + .../skills/conventions-agent/evals/evals.json | 24 ++ .../references/fallback-tree.md | 13 + .../.claude-plugin/plugin.json | 21 +- .../skills/context-bundling/SKILL.md | 1 + .../skills/context-bundling/evals/evals.json | 30 ++ .../references/fallback-tree.md | 17 + .../skills/zip-bundling/SKILL.md | 1 + .../skills/zip-bundling/evals/evals.json | 30 ++ .../references/acceptance-criteria.md | 13 + .../zip-bundling/references/fallback-tree.md | 17 + .../copilot-cli/.claude-plugin/plugin.json | 13 + plugins/copilot-cli/plugin.json | 9 - .../skills/copilot-cli-agent/SKILL.md | 3 +- .../skills/copilot-cli-agent/evals/evals.json | 30 ++ .../references/acceptance-criteria.md | 17 + .../references/fallback-tree.md | 17 + .../.claude-plugin/plugin.json | 13 +- .../skills/dependency-management/SKILL.md | 1 + .../dependency-management/evals/evals.json | 30 ++ .../references/fallback-tree.md | 17 + plugins/env-helper/.claude-plugin/plugin.json | 13 + plugins/env-helper/README.md | 42 ++ plugins/env-helper/plugin.json | 12 - plugins/env-helper/skills/env-helper/SKILL.md | 49 +-- .../skills/env-helper/evals/evals.json | 24 ++ .../references/acceptance-criteria.md | 7 + .../env-helper/references/fallback-tree.md | 15 + .../excel-to-csv/.claude-plugin/plugin.json | 22 +- plugins/excel-to-csv/README.md | 10 +- .../excel-to-csv/skills/excel-to-csv/SKILL.md | 50 ++- .../skills/excel-to-csv/evals/evals.json | 30 ++ .../references/acceptance-criteria.md | 17 +- .../excel-to-csv/references/fallback-tree.md | 20 + .../skills/excel-to-csv/scripts/verify_csv.py | 85 ++++ plugins/gemini-cli/.claude-plugin/plugin.json | 13 + plugins/gemini-cli/plugin.json | 9 - .../skills/gemini-cli-agent/SKILL.md | 3 +- .../skills/gemini-cli-agent/evals/evals.json | 30 ++ .../references/acceptance-criteria.md | 17 + .../references/fallback-tree.md | 17 + .../{ => .claude-plugin}/plugin.json | 8 +- .../huggingface-utils/skills/hf-init/SKILL.md | 3 +- .../skills/hf-init/evals/evals.json | 24 ++ .../hf-init/references/acceptance-criteria.md | 14 + .../hf-init/references/fallback-tree.md | 17 + .../skills/hf-upload/SKILL.md | 3 +- .../skills/hf-upload/evals/evals.json | 30 ++ .../references/acceptance-criteria.md | 17 + .../hf-upload/references/fallback-tree.md | 17 + .../json-hygiene/.claude-plugin/plugin.json | 13 + plugins/json-hygiene/README.md | 4 +- plugins/json-hygiene/plugin.json | 12 - .../skills/json-hygiene-agent/SKILL.md | 52 +-- .../json-hygiene-agent/evals/evals.json | 24 ++ .../references/acceptance-criteria.md | 7 + .../references/fallback-tree.md | 11 + .../scripts/find_json_duplicates.py | 104 +++-- .../link-checker/.claude-plugin/plugin.json | 13 +- .../skills/link-checker-agent/SKILL.md | 1 + .../link-checker-agent/evals/evals.json | 30 ++ .../references/fallback-tree.md | 17 + .../.claude-plugin/plugin.json | 15 +- .../markdown-to-msword-converter/README.md | 5 +- .../markdown-to-msword-converter/SKILL.md | 57 +-- .../evals/evals.json | 24 ++ .../references/acceptance-criteria.md | 7 + .../references/fallback-tree.md | 19 + .../scripts/verify_docx.py | 84 ++++ .../{ => .claude-plugin}/plugin.json | 8 +- .../skills/memory-management/SKILL.md | 1 + .../skills/memory-management/evals/evals.json | 30 ++ .../references/acceptance-criteria.md | 20 + .../references/fallback-tree.md | 17 + .../mermaid-to-png/.claude-plugin/plugin.json | 18 +- .../skills/convert-mermaid/SKILL.md | 56 ++- .../skills/convert-mermaid/evals/evals.json | 30 ++ .../references/acceptance-criteria.md | 10 +- .../{ => references}/convert-mermaid-flow.mmd | 0 .../{ => references}/convert-mermaid-flow.png | Bin .../references/fallback-tree.md | 19 + .../mermaid-to-png-architecture.mmd | 0 .../mermaid-to-png-architecture.png | Bin .../{ => references}/reference.md | 0 .../convert-mermaid/scripts/verify_png.py | 72 ++++ .../{ => .claude-plugin}/plugin.json | 8 +- .../skills/obsidian-bases-manager/SKILL.md | 3 +- .../obsidian-bases-manager/evals/evals.json | 24 ++ .../references/acceptance-criteria.md | 13 + .../references/fallback-tree.md | 13 + .../skills/obsidian-canvas-architect/SKILL.md | 3 +- .../evals/evals.json | 24 ++ .../references/acceptance-criteria.md | 14 + .../references/fallback-tree.md | 13 + .../skills/obsidian-graph-traversal/SKILL.md | 3 +- .../obsidian-graph-traversal/evals/evals.json | 24 ++ .../references/acceptance-criteria.md | 15 + .../references/fallback-tree.md | 13 + .../skills/obsidian-init/SKILL.md | 3 +- .../skills/obsidian-init/evals/evals.json | 24 ++ .../references/acceptance-criteria.md | 14 + .../obsidian-init/references/fallback-tree.md | 17 + .../skills/obsidian-markdown-mastery/SKILL.md | 3 +- .../evals/evals.json | 24 ++ .../references/acceptance-criteria.md | 14 + .../references/fallback-tree.md | 13 + .../skills/obsidian-vault-crud/SKILL.md | 3 +- .../obsidian-vault-crud/evals/evals.json | 30 ++ .../references/acceptance-criteria.md | 18 + .../references/fallback-tree.md | 17 + .../plugin-manager/.claude-plugin/plugin.json | 18 +- plugins/plugin-manager/README.md | 93 ++--- plugins/plugin-manager/commands/cleanup.md | 20 +- plugins/plugin-manager/commands/install.md | 49 ++- plugins/plugin-manager/commands/update.md | 33 +- .../plugin-manager/resources/cleanup_flow.mmd | 72 ---- .../resources/plugin_replicator_diagram.mmd | 27 -- .../resources/plugin_replicator_overview.md | 26 -- .../plugin-manager/scripts/bulk_replicator.py | 126 +++--- .../scripts/plugin_bootstrap.py | 19 +- .../scripts/plugin_replicator.py | 231 +++++++---- .../scripts/update_agent_system.py | 69 ++-- .../skills/agent-bridge/SKILL.md | 45 --- .../skills/ecosystem-cleanup-sync/SKILL.md | 67 --- .../skills/plugin-bootstrap/SKILL.md | 66 --- .../skills/plugin-maintenance/SKILL.md | 165 ++++++-- .../plugin-maintenance/evals/evals.json | 30 ++ .../references/acceptance-criteria.md | 21 + .../references/cleanup_flow.mmd | 67 +++ .../references}/cleanup_process.md | 3 +- .../references/fallback-tree.md | 20 + .../skills/plugin-replicator/SKILL.md | 114 ++++-- .../skills/plugin-replicator/evals/evals.json | 30 ++ .../references/acceptance-criteria.md | 23 ++ .../references/fallback-tree.md | 19 + .../references/plugin_replicator_diagram.mmd | 32 ++ .../references/plugin_replicator_overview.md | 42 ++ .../plugin-mapper/.claude-plugin/plugin.json | 13 +- plugins/plugin-mapper/README.md | 6 +- plugins/plugin-mapper/commands/update.md | 7 +- .../skills/agent-bridge/SKILL.md | 10 +- .../skills/agent-bridge/evals/evals.json | 30 ++ .../references/agent_bridge_diagram.mmd | 50 +-- .../references/agent_bridge_diagram.png | Bin 31049 -> 74160 bytes .../references/agent_bridge_overview.md | 106 ++--- .../agent-bridge/references/fallback-tree.md | 19 + .../rlm-factory/.claude-plugin/plugin.json | 15 +- .../references/acceptance-criteria.md | 6 + .../rlm-factory/skills/rlm-curator/SKILL.md | 135 ++----- .../skills/rlm-curator/evals/evals.json | 24 ++ .../references/acceptance-criteria.md | 6 + .../rlm-curator/references/fallback-tree.md | 15 + .../rlm-curator/scripts/inject_summary.py | 2 - .../references/acceptance-criteria.md | 6 + .../rules/spec_driven_development_policy.md | 44 -- .../.agent/rules/standard-workflow-rules.md | 67 --- .../rules/workflow_artifacts_integrity.md | 63 --- .../rules/workflow_enforcement_policy.md | 38 -- .../.claude-plugin/plugin.json | 2 +- .../spec-kitty-plugin/agents/spec-kitty.md | 2 +- .../commands/spec-kitty-accept/SKILL.md | 4 +- .../commands/spec-kitty-analyze/SKILL.md | 10 +- .../commands/spec-kitty-checklist/SKILL.md | 6 +- .../commands/spec-kitty-clarify/SKILL.md | 4 +- .../commands/spec-kitty-constitution/SKILL.md | 2 +- .../commands/spec-kitty-implement/SKILL.md | 8 +- .../commands/spec-kitty-merge/SKILL.md | 381 +----------------- .../commands/spec-kitty-plan/SKILL.md | 2 +- .../commands/spec-kitty-research/SKILL.md | 10 +- .../commands/spec-kitty-review/SKILL.md | 4 +- .../commands/spec-kitty-specify/SKILL.md | 8 +- .../commands/spec-kitty-tasks/SKILL.md | 38 +- .../spec_driven_development_policy.md | 56 --- .../resources/templates/tasks-template.md | 56 --- plugins/spec-kitty-plugin/rules/AGENTS.md | 18 +- .../rules/coding-conventions.md | 17 + .../spec-kitty-plugin/rules/constitution.md | 97 ++--- .../rules/dependency-management.md | 15 + plugins/spec-kitty-plugin/templates/review.md | 6 +- .../templates/tasks-template.md | 10 +- .../task-manager/.claude-plugin/plugin.json | 7 +- .../task-manager/skills/task-agent/SKILL.md | 54 +-- .../skills/task-agent/evals/evals.json | 30 ++ .../task-agent/references/fallback-tree.md | 15 + .../tool-inventory/.claude-plugin/plugin.json | 18 +- .../skills/tool-inventory/SKILL.md | 106 ++--- .../skills/tool-inventory/evals/evals.json | 24 ++ .../references/acceptance-criteria.md | 20 +- .../references/fallback-tree.md | 15 + plugins/vector-db/.claude-plugin/plugin.json | 14 +- .../vector-db/skills/vector-db-agent/SKILL.md | 61 ++- .../skills/vector-db-agent/evals/evals.json | 30 ++ .../references/acceptance-criteria.md | 15 +- .../references/fallback-tree.md | 15 + .../references/acceptance-criteria.md | 5 + .../references/acceptance-criteria.md | 8 +- 313 files changed, 5464 insertions(+), 2377 deletions(-) create mode 100644 plugins/adr-manager/skills/adr-management/evals/evals.json create mode 100644 plugins/adr-manager/skills/adr-management/references/fallback-tree.md create mode 100644 plugins/agent-loops/skills/agent-swarm/evals/evals.json create mode 100644 plugins/agent-loops/skills/agent-swarm/references/acceptance-criteria.md create mode 100644 plugins/agent-loops/skills/agent-swarm/references/fallback-tree.md create mode 100644 plugins/agent-loops/skills/dual-loop/evals/evals.json create mode 100644 plugins/agent-loops/skills/dual-loop/references/acceptance-criteria.md create mode 100644 plugins/agent-loops/skills/dual-loop/references/fallback-tree.md create mode 100644 plugins/agent-loops/skills/learning-loop/evals/evals.json create mode 100644 plugins/agent-loops/skills/learning-loop/references/acceptance-criteria.md create mode 100644 plugins/agent-loops/skills/learning-loop/references/fallback-tree.md create mode 100644 plugins/agent-loops/skills/orchestrator/evals/evals.json create mode 100644 plugins/agent-loops/skills/orchestrator/references/fallback-tree.md create mode 100644 plugins/agent-loops/skills/red-team-review/evals/evals.json create mode 100644 plugins/agent-loops/skills/red-team-review/references/acceptance-criteria.md create mode 100644 plugins/agent-loops/skills/red-team-review/references/fallback-tree.md create mode 100644 plugins/agent-plugin-analyzer/agents/l5-red-team-auditor.md create mode 100644 plugins/agent-plugin-analyzer/research/anthropic-skills-learnings.md create mode 100644 plugins/agent-plugin-analyzer/research/pdf-skill-learnings.md create mode 100644 plugins/agent-plugin-analyzer/skills/analyze-plugin/evals/evals.json create mode 100644 plugins/agent-plugin-analyzer/skills/analyze-plugin/references/fallback-tree.md create mode 100644 plugins/agent-plugin-analyzer/skills/audit-plugin-l5/CONNECTORS.md create mode 100644 plugins/agent-plugin-analyzer/skills/audit-plugin-l5/SKILL.md create mode 100644 plugins/agent-plugin-analyzer/skills/audit-plugin-l5/audit-plugin-l5-flow.mmd create mode 100644 plugins/agent-plugin-analyzer/skills/audit-plugin-l5/evals/evals.json create mode 100644 plugins/agent-plugin-analyzer/skills/audit-plugin-l5/references/acceptance-criteria.md create mode 100644 plugins/agent-plugin-analyzer/skills/audit-plugin-l5/references/architecture.md create mode 100644 plugins/agent-plugin-analyzer/skills/audit-plugin-l5/references/fallback-tree.md create mode 100755 plugins/agent-plugin-analyzer/skills/audit-plugin-l5/scripts/execute.py create mode 100644 plugins/agent-plugin-analyzer/skills/synthesize-learnings/evals/evals.json create mode 100644 plugins/agent-plugin-analyzer/skills/synthesize-learnings/references/fallback-tree.md create mode 100644 plugins/agent-scaffolders/skills/audit-plugin/evals/evals.json create mode 100644 plugins/agent-scaffolders/skills/audit-plugin/references/fallback-tree.md create mode 100644 plugins/agent-scaffolders/skills/create-agentic-workflow/evals/evals.json create mode 100644 plugins/agent-scaffolders/skills/create-agentic-workflow/references/fallback-tree.md create mode 100644 plugins/agent-scaffolders/skills/create-azure-agent/evals/evals.json create mode 100644 plugins/agent-scaffolders/skills/create-azure-agent/references/fallback-tree.md create mode 100644 plugins/agent-scaffolders/skills/create-docker-skill/evals/evals.json create mode 100644 plugins/agent-scaffolders/skills/create-docker-skill/references/fallback-tree.md create mode 100644 plugins/agent-scaffolders/skills/create-github-action/evals/evals.json create mode 100644 plugins/agent-scaffolders/skills/create-github-action/references/fallback-tree.md create mode 100644 plugins/agent-scaffolders/skills/create-hook/evals/evals.json create mode 100644 plugins/agent-scaffolders/skills/create-hook/references/fallback-tree.md create mode 100644 plugins/agent-scaffolders/skills/create-legacy-command/evals/evals.json create mode 100644 plugins/agent-scaffolders/skills/create-legacy-command/references/fallback-tree.md create mode 100644 plugins/agent-scaffolders/skills/create-mcp-integration/evals/evals.json create mode 100644 plugins/agent-scaffolders/skills/create-mcp-integration/references/fallback-tree.md create mode 100644 plugins/agent-scaffolders/skills/create-plugin/evals/evals.json create mode 100644 plugins/agent-scaffolders/skills/create-plugin/references/fallback-tree.md create mode 100644 plugins/agent-scaffolders/skills/create-skill/evals/evals.json create mode 100644 plugins/agent-scaffolders/skills/create-skill/references/fallback-tree.md delete mode 100644 plugins/agent-scaffolders/skills/create-skill/references/hitl-interaction-design.md create mode 100644 plugins/agent-scaffolders/skills/create-stateful-skill/evals/evals.json create mode 100644 plugins/agent-scaffolders/skills/create-stateful-skill/references/fallback-tree.md create mode 100644 plugins/agent-scaffolders/skills/create-sub-agent/evals/evals.json create mode 100644 plugins/agent-scaffolders/skills/create-sub-agent/references/fallback-tree.md create mode 100644 plugins/agent-skill-open-specifications/L4-pattern-definitions/artifact-generation-xss-compliance-gate.md create mode 100644 plugins/agent-skill-open-specifications/L4-pattern-definitions/asynchronous-benchmark-metric-capture.md create mode 100644 plugins/agent-skill-open-specifications/L4-pattern-definitions/client-side-compute-sandbox-constraint.md create mode 100644 plugins/agent-skill-open-specifications/L4-pattern-definitions/delegated-constraint-verification-loop.md create mode 100644 plugins/agent-skill-open-specifications/L4-pattern-definitions/dynamic-specification-fetching.md create mode 100644 plugins/agent-skill-open-specifications/L4-pattern-definitions/explicit-seed-anchored-determinism.md create mode 100644 plugins/agent-skill-open-specifications/L4-pattern-definitions/highly-procedural-fallback-trees.md create mode 100644 plugins/agent-skill-open-specifications/L4-pattern-definitions/iteration-directory-isolation.md create mode 100644 plugins/agent-skill-open-specifications/L4-pattern-definitions/local-interactive-output-viewer-loop.md create mode 100644 plugins/agent-skill-open-specifications/L4-pattern-definitions/multi-variant-trigger-optimizer.md create mode 100644 plugins/agent-skill-open-specifications/L4-pattern-definitions/negative-instruction-constraint.md create mode 100644 plugins/agent-skill-open-specifications/L4-pattern-definitions/passive-style-injection-payload.md create mode 100644 plugins/agent-skill-open-specifications/L4-pattern-definitions/rigorous-benchmarking-loop.md create mode 100644 plugins/agent-skill-open-specifications/L4-pattern-definitions/tainted-context-cleanser.md create mode 100644 plugins/agent-skill-open-specifications/L4-pattern-definitions/trigger-description-optimization-loop.md create mode 100644 plugins/agent-skill-open-specifications/L4-pattern-definitions/ui-degradation-constraint.md create mode 100644 plugins/agent-skill-open-specifications/skills/ecosystem-authoritative-sources/evals/evals.json create mode 100644 plugins/agent-skill-open-specifications/skills/ecosystem-authoritative-sources/reference/skill-evaluation-and-testing.md create mode 100644 plugins/agent-skill-open-specifications/skills/ecosystem-authoritative-sources/references/fallback-tree.md create mode 100644 plugins/agent-skill-open-specifications/skills/ecosystem-standards/evals/evals.json create mode 100644 plugins/agent-skill-open-specifications/skills/ecosystem-standards/references/fallback-tree.md rename plugins/claude-cli/{ => .claude-plugin}/plugin.json (60%) create mode 100644 plugins/claude-cli/skills/claude-cli-agent/evals/evals.json create mode 100644 plugins/claude-cli/skills/claude-cli-agent/references/acceptance-criteria.md create mode 100644 plugins/claude-cli/skills/claude-cli-agent/references/fallback-tree.md create mode 100644 plugins/coding-conventions/skills/coding-conventions/evals/evals.json create mode 100644 plugins/coding-conventions/skills/coding-conventions/references/fallback-tree.md create mode 100644 plugins/coding-conventions/skills/conventions-agent/evals/evals.json create mode 100644 plugins/coding-conventions/skills/conventions-agent/references/fallback-tree.md create mode 100644 plugins/context-bundler/skills/context-bundling/evals/evals.json create mode 100644 plugins/context-bundler/skills/context-bundling/references/fallback-tree.md create mode 100644 plugins/context-bundler/skills/zip-bundling/evals/evals.json create mode 100644 plugins/context-bundler/skills/zip-bundling/references/acceptance-criteria.md create mode 100644 plugins/context-bundler/skills/zip-bundling/references/fallback-tree.md create mode 100644 plugins/copilot-cli/.claude-plugin/plugin.json delete mode 100644 plugins/copilot-cli/plugin.json create mode 100644 plugins/copilot-cli/skills/copilot-cli-agent/evals/evals.json create mode 100644 plugins/copilot-cli/skills/copilot-cli-agent/references/acceptance-criteria.md create mode 100644 plugins/copilot-cli/skills/copilot-cli-agent/references/fallback-tree.md create mode 100644 plugins/dependency-management/skills/dependency-management/evals/evals.json create mode 100644 plugins/dependency-management/skills/dependency-management/references/fallback-tree.md create mode 100644 plugins/env-helper/.claude-plugin/plugin.json create mode 100644 plugins/env-helper/README.md delete mode 100644 plugins/env-helper/plugin.json create mode 100644 plugins/env-helper/skills/env-helper/evals/evals.json create mode 100644 plugins/env-helper/skills/env-helper/references/acceptance-criteria.md create mode 100644 plugins/env-helper/skills/env-helper/references/fallback-tree.md create mode 100644 plugins/excel-to-csv/skills/excel-to-csv/evals/evals.json create mode 100644 plugins/excel-to-csv/skills/excel-to-csv/references/fallback-tree.md create mode 100644 plugins/excel-to-csv/skills/excel-to-csv/scripts/verify_csv.py create mode 100644 plugins/gemini-cli/.claude-plugin/plugin.json delete mode 100644 plugins/gemini-cli/plugin.json create mode 100644 plugins/gemini-cli/skills/gemini-cli-agent/evals/evals.json create mode 100644 plugins/gemini-cli/skills/gemini-cli-agent/references/acceptance-criteria.md create mode 100644 plugins/gemini-cli/skills/gemini-cli-agent/references/fallback-tree.md rename plugins/huggingface-utils/{ => .claude-plugin}/plugin.json (62%) create mode 100644 plugins/huggingface-utils/skills/hf-init/evals/evals.json create mode 100644 plugins/huggingface-utils/skills/hf-init/references/acceptance-criteria.md create mode 100644 plugins/huggingface-utils/skills/hf-init/references/fallback-tree.md create mode 100644 plugins/huggingface-utils/skills/hf-upload/evals/evals.json create mode 100644 plugins/huggingface-utils/skills/hf-upload/references/acceptance-criteria.md create mode 100644 plugins/huggingface-utils/skills/hf-upload/references/fallback-tree.md create mode 100644 plugins/json-hygiene/.claude-plugin/plugin.json delete mode 100644 plugins/json-hygiene/plugin.json create mode 100644 plugins/json-hygiene/skills/json-hygiene-agent/evals/evals.json create mode 100644 plugins/json-hygiene/skills/json-hygiene-agent/references/acceptance-criteria.md create mode 100644 plugins/json-hygiene/skills/json-hygiene-agent/references/fallback-tree.md create mode 100644 plugins/link-checker/skills/link-checker-agent/evals/evals.json create mode 100644 plugins/link-checker/skills/link-checker-agent/references/fallback-tree.md create mode 100644 plugins/markdown-to-msword-converter/skills/markdown-to-msword-converter/evals/evals.json create mode 100644 plugins/markdown-to-msword-converter/skills/markdown-to-msword-converter/references/acceptance-criteria.md create mode 100644 plugins/markdown-to-msword-converter/skills/markdown-to-msword-converter/references/fallback-tree.md create mode 100644 plugins/markdown-to-msword-converter/skills/markdown-to-msword-converter/scripts/verify_docx.py rename plugins/memory-management/{ => .claude-plugin}/plugin.json (59%) create mode 100644 plugins/memory-management/skills/memory-management/evals/evals.json create mode 100644 plugins/memory-management/skills/memory-management/references/acceptance-criteria.md create mode 100644 plugins/memory-management/skills/memory-management/references/fallback-tree.md create mode 100644 plugins/mermaid-to-png/skills/convert-mermaid/evals/evals.json rename plugins/mermaid-to-png/skills/convert-mermaid/{ => references}/convert-mermaid-flow.mmd (100%) rename plugins/mermaid-to-png/skills/convert-mermaid/{ => references}/convert-mermaid-flow.png (100%) create mode 100644 plugins/mermaid-to-png/skills/convert-mermaid/references/fallback-tree.md rename plugins/mermaid-to-png/{ => skills/convert-mermaid/references}/mermaid-to-png-architecture.mmd (100%) rename plugins/mermaid-to-png/{ => skills/convert-mermaid/references}/mermaid-to-png-architecture.png (100%) rename plugins/mermaid-to-png/skills/convert-mermaid/{ => references}/reference.md (100%) create mode 100644 plugins/mermaid-to-png/skills/convert-mermaid/scripts/verify_png.py rename plugins/obsidian-integration/{ => .claude-plugin}/plugin.json (71%) create mode 100644 plugins/obsidian-integration/skills/obsidian-bases-manager/evals/evals.json create mode 100644 plugins/obsidian-integration/skills/obsidian-bases-manager/references/acceptance-criteria.md create mode 100644 plugins/obsidian-integration/skills/obsidian-bases-manager/references/fallback-tree.md create mode 100644 plugins/obsidian-integration/skills/obsidian-canvas-architect/evals/evals.json create mode 100644 plugins/obsidian-integration/skills/obsidian-canvas-architect/references/acceptance-criteria.md create mode 100644 plugins/obsidian-integration/skills/obsidian-canvas-architect/references/fallback-tree.md create mode 100644 plugins/obsidian-integration/skills/obsidian-graph-traversal/evals/evals.json create mode 100644 plugins/obsidian-integration/skills/obsidian-graph-traversal/references/acceptance-criteria.md create mode 100644 plugins/obsidian-integration/skills/obsidian-graph-traversal/references/fallback-tree.md create mode 100644 plugins/obsidian-integration/skills/obsidian-init/evals/evals.json create mode 100644 plugins/obsidian-integration/skills/obsidian-init/references/acceptance-criteria.md create mode 100644 plugins/obsidian-integration/skills/obsidian-init/references/fallback-tree.md create mode 100644 plugins/obsidian-integration/skills/obsidian-markdown-mastery/evals/evals.json create mode 100644 plugins/obsidian-integration/skills/obsidian-markdown-mastery/references/acceptance-criteria.md create mode 100644 plugins/obsidian-integration/skills/obsidian-markdown-mastery/references/fallback-tree.md create mode 100644 plugins/obsidian-integration/skills/obsidian-vault-crud/evals/evals.json create mode 100644 plugins/obsidian-integration/skills/obsidian-vault-crud/references/acceptance-criteria.md create mode 100644 plugins/obsidian-integration/skills/obsidian-vault-crud/references/fallback-tree.md delete mode 100644 plugins/plugin-manager/resources/cleanup_flow.mmd delete mode 100644 plugins/plugin-manager/resources/plugin_replicator_diagram.mmd delete mode 100644 plugins/plugin-manager/resources/plugin_replicator_overview.md delete mode 100644 plugins/plugin-manager/skills/agent-bridge/SKILL.md delete mode 100644 plugins/plugin-manager/skills/ecosystem-cleanup-sync/SKILL.md delete mode 100644 plugins/plugin-manager/skills/plugin-bootstrap/SKILL.md create mode 100644 plugins/plugin-manager/skills/plugin-maintenance/evals/evals.json create mode 100644 plugins/plugin-manager/skills/plugin-maintenance/references/acceptance-criteria.md create mode 100644 plugins/plugin-manager/skills/plugin-maintenance/references/cleanup_flow.mmd rename plugins/plugin-manager/{resources => skills/plugin-maintenance/references}/cleanup_process.md (91%) create mode 100644 plugins/plugin-manager/skills/plugin-maintenance/references/fallback-tree.md create mode 100644 plugins/plugin-manager/skills/plugin-replicator/evals/evals.json create mode 100644 plugins/plugin-manager/skills/plugin-replicator/references/acceptance-criteria.md create mode 100644 plugins/plugin-manager/skills/plugin-replicator/references/fallback-tree.md create mode 100644 plugins/plugin-manager/skills/plugin-replicator/references/plugin_replicator_diagram.mmd create mode 100644 plugins/plugin-manager/skills/plugin-replicator/references/plugin_replicator_overview.md create mode 100644 plugins/plugin-mapper/skills/agent-bridge/evals/evals.json create mode 100644 plugins/plugin-mapper/skills/agent-bridge/references/fallback-tree.md create mode 100644 plugins/rlm-factory/skills/ollama-launch/references/acceptance-criteria.md create mode 100644 plugins/rlm-factory/skills/rlm-curator/evals/evals.json create mode 100644 plugins/rlm-factory/skills/rlm-curator/references/acceptance-criteria.md create mode 100644 plugins/rlm-factory/skills/rlm-curator/references/fallback-tree.md create mode 100644 plugins/rlm-factory/skills/rlm-init/references/acceptance-criteria.md delete mode 100644 plugins/spec-kitty-plugin/.agent/rules/spec_driven_development_policy.md delete mode 100644 plugins/spec-kitty-plugin/.agent/rules/standard-workflow-rules.md delete mode 100644 plugins/spec-kitty-plugin/.agent/rules/workflow_artifacts_integrity.md delete mode 100644 plugins/spec-kitty-plugin/.agent/rules/workflow_enforcement_policy.md delete mode 100644 plugins/spec-kitty-plugin/references/spec_driven_development_policy.md delete mode 100644 plugins/spec-kitty-plugin/resources/templates/tasks-template.md create mode 100644 plugins/spec-kitty-plugin/rules/coding-conventions.md create mode 100644 plugins/spec-kitty-plugin/rules/dependency-management.md create mode 100644 plugins/task-manager/skills/task-agent/evals/evals.json create mode 100644 plugins/task-manager/skills/task-agent/references/fallback-tree.md create mode 100644 plugins/tool-inventory/skills/tool-inventory/evals/evals.json create mode 100644 plugins/tool-inventory/skills/tool-inventory/references/fallback-tree.md create mode 100644 plugins/vector-db/skills/vector-db-agent/evals/evals.json create mode 100644 plugins/vector-db/skills/vector-db-agent/references/fallback-tree.md create mode 100644 plugins/vector-db/skills/vector-db-init/references/acceptance-criteria.md diff --git a/plugins/adr-manager/.claude-plugin/plugin.json b/plugins/adr-manager/.claude-plugin/plugin.json index 56a70afa..9734ebbf 100644 --- a/plugins/adr-manager/.claude-plugin/plugin.json +++ b/plugins/adr-manager/.claude-plugin/plugin.json @@ -1,10 +1,12 @@ { "name": "adr-manager", - "description": "Manage Architecture Decision Records - create, list, search ADRs with auto-numbering", - "version": "1.1.0", + "version": "2.0.0", + "description": "ADR management skill for generating architecture decisions, documenting design rationale, and maintaining the decision record log using native read/write tools.", "author": { - "name": "Richard Fremmerlid" + "name": "Richard Fremmerlid", + "url": "https://github.com/richfrem" }, + "repository": "https://github.com/richfrem/agent-plugins-skills", "license": "MIT", "keywords": [ "adr", @@ -12,5 +14,7 @@ "decisions", "documentation" ], - "skills_dir": "skills" + "skills": [ + "adr-management" + ] } \ No newline at end of file diff --git a/plugins/adr-manager/skills/adr-management/SKILL.md b/plugins/adr-manager/skills/adr-management/SKILL.md index d34e5127..ce3b3ea7 100644 --- a/plugins/adr-manager/skills/adr-management/SKILL.md +++ b/plugins/adr-manager/skills/adr-management/SKILL.md @@ -4,6 +4,7 @@ description: > ADR management skill. Auto-invoked for generating architecture decisions, documenting design rationale, and maintaining the decision record log. Uses native read/write tools to scaffold and update ADR markdown files. +allowed-tools: Bash, Read, Write --- # Identity: The ADR Manager 📐 diff --git a/plugins/adr-manager/skills/adr-management/evals/evals.json b/plugins/adr-manager/skills/adr-management/evals/evals.json new file mode 100644 index 00000000..ec750d97 --- /dev/null +++ b/plugins/adr-manager/skills/adr-management/evals/evals.json @@ -0,0 +1,30 @@ +{ + "plugin": "adr-manager", + "skill": "adr-management", + "evaluations": [ + { + "id": "eval-1-auto-numbering", + "type": "positive", + "prompt": "Create an ADR for switching from SQLite to PostgreSQL.", + "expected_behavior": "Agent runs adr_manager.py create, which auto-determines the next sequential ID from the ADRs/ directory. It does NOT ask the user for an ID or guess one. The generated filename uses 4-digit zero-padded format (e.g., 0023-use-postgresql.md)." + }, + { + "id": "eval-2-supersede-old-adr", + "type": "positive", + "prompt": "This new ADR supersedes ADR-0003. Update ADR-0003 accordingly.", + "expected_behavior": "Agent opens ADR-0003, changes its Status field to 'Superseded', and adds a cross-reference link to the new ADR. It does NOT delete or archive ADR-0003." + }, + { + "id": "eval-3-all-sections-filled", + "type": "negative", + "prompt": "Create an ADR for using Redis as a cache.", + "expected_behavior": "All 5 sections (Status, Context, Decision, Consequences, Alternatives) are populated. Agent extrapolates Consequences and Alternatives from its software engineering knowledge if the user did not provide them. A blank section is not acceptable." + }, + { + "id": "eval-4-search-before-create", + "type": "edge-case", + "prompt": "Create an ADR about database caching.", + "expected_behavior": "Agent runs adr_manager.py search 'cache' to check if a related ADR already exists before creating a new one. If a related ADR is found, it asks the user to confirm whether to create a new one or update the existing one." + } + ] +} \ No newline at end of file diff --git a/plugins/adr-manager/skills/adr-management/references/fallback-tree.md b/plugins/adr-manager/skills/adr-management/references/fallback-tree.md new file mode 100644 index 00000000..65550a73 --- /dev/null +++ b/plugins/adr-manager/skills/adr-management/references/fallback-tree.md @@ -0,0 +1,17 @@ +# Procedural Fallback Tree: ADR Management + +## 1. ADRs Directory Does Not Exist +If `adr_manager.py create` is run and the target directory (`ADRs/` or custom) does not exist: +- **Action**: The script creates the directory automatically on first run (per acceptance criteria). Report to the user that the directory was created. Do NOT fail silently. + +## 2. ID Numbering Conflict (Duplicate Found) +If `next_number.py` detects that the next sequential ID already exists as a file: +- **Action**: Report the conflict, showing the conflicting filename. Do NOT overwrite the existing file. Increment past the conflict and report the new ID used. + +## 3. Existing ADR Not Found When Superseding +If instructed to mark an ADR as Superseded but the referenced ADR number does not exist in the directory: +- **Action**: Report the missing ADR number. List the available ADR IDs (via `adr_manager.py list`). Ask the user to confirm the correct ID before making any changes. + +## 4. Template Sections Missing or Blank +If any of the 5 required sections (Status, Context, Decision, Consequences, Alternatives) would be left blank: +- **Action**: Extrapolate the missing sections from context using software engineering knowledge. If insufficient information is available, ask the user a targeted question for each blank section. Never create a skeleton ADR with empty sections. diff --git a/plugins/agent-loops/.claude-plugin/plugin.json b/plugins/agent-loops/.claude-plugin/plugin.json index c4ac34d8..4525edb5 100644 --- a/plugins/agent-loops/.claude-plugin/plugin.json +++ b/plugins/agent-loops/.claude-plugin/plugin.json @@ -2,7 +2,19 @@ "name": "agent-loops", "version": "2.0.0", "description": "Composable agent loop architectures for learning loops, agent orchestration (0-to-N inner agents), and red team coordination. Framework-agnostic: works with any CLI agent (Claude, Gemini, Copilot, OpenHands, etc.).", - "author": "Richard Fremmerlid", + "author": { + "name": "Richard Fremmerlid", + "url": "https://github.com/richfrem" + }, + "repository": "https://github.com/richfrem/agent-plugins-skills", + "license": "MIT", + "skills": [ + "agent-swarm", + "dual-loop", + "learning-loop", + "orchestrator", + "red-team-review" + ], "dependencies": [ "context-bundler", "rlm-factory" diff --git a/plugins/agent-loops/skills/agent-swarm/SKILL.md b/plugins/agent-loops/skills/agent-swarm/SKILL.md index 3a31bb86..0e7d24ef 100644 --- a/plugins/agent-loops/skills/agent-swarm/SKILL.md +++ b/plugins/agent-loops/skills/agent-swarm/SKILL.md @@ -2,6 +2,7 @@ name: agent-swarm aliases: ["Parallel Agent"] description: "(Industry standard: Parallel Agent) Primary Use Case: Work that can be partitioned into independent sub-tasks running concurrently across multiple agents. Parallel multi-agent execution pattern. Use when: work can be partitioned into independent tasks that N agents can execute simultaneously across worktrees. Includes routing (sequential vs parallel), merge verification, and correction loops." +allowed-tools: Bash, Read, Write --- # Agent Swarm @@ -134,6 +135,7 @@ Then rerun with `--resume`. - Post-commands must be idempotent if using resume - Orchestrator owns the overall job state - `{file}` in post_cmd is shell-quoted automatically -- filenames with apostrophes are safe +- **Asynchronous Benchmark Metric Capture**: Orchestrators MUST capture and log `total_tokens` and `duration_ms` from worker agents to a centralized `timing.json` log immediately as subtasks complete, rather than waiting for the entire swarm batch to finish. ## Diagram diff --git a/plugins/agent-loops/skills/agent-swarm/evals/evals.json b/plugins/agent-loops/skills/agent-swarm/evals/evals.json new file mode 100644 index 00000000..77fe788a --- /dev/null +++ b/plugins/agent-loops/skills/agent-swarm/evals/evals.json @@ -0,0 +1,30 @@ +{ + "plugin": "agent-loops", + "skill": "agent-swarm", + "evaluations": [ + { + "id": "eval-1-swarm-execution", + "type": "positive", + "prompt": "Run a batch formatting job across these 50 markdown files.", + "expected_behavior": "Agent scopes the boundaries, generates a job (.job.md) file, and invokes swarm_run.py to split the work across independent parallel workers." + }, + { + "id": "eval-2-strict-isolation", + "type": "negative", + "prompt": "Have the 5 agents in the swarm collaborate on a single file at the same time.", + "expected_behavior": "Agent rejects the request. Explains the strict isolation constraint of agent-swarm (no cross-worktree communication). Tasks must be partitioned independently." + }, + { + "id": "eval-3-copilot-rate-limit-protection", + "type": "edge-case", + "prompt": "Launch 10 parallel Copilot workers to process this checklist fast.", + "expected_behavior": "Agent overrides the worker count down to 2, explicitly citing the Known Engine Quirks rate-limit protection for Copilot. It refuses to launch 10 workers which would trigger abuse filters." + }, + { + "id": "eval-4-resume-capability", + "type": "positive", + "prompt": "The batch job crashed halfway through. Can we finish the rest?", + "expected_behavior": "Agent identifies the partially filled state file and re-invokes swarm_run.py using the --resume flag, intentionally skipping already-processed files." + } + ] +} \ No newline at end of file diff --git a/plugins/agent-loops/skills/agent-swarm/references/acceptance-criteria.md b/plugins/agent-loops/skills/agent-swarm/references/acceptance-criteria.md new file mode 100644 index 00000000..6df029f9 --- /dev/null +++ b/plugins/agent-loops/skills/agent-swarm/references/acceptance-criteria.md @@ -0,0 +1,12 @@ +# Acceptance Criteria: Agent Swarm + +## 1. Execution Boundary Constraints +- [ ] Orchestrator does NOT execute the payload commands itself. It strictly maps the jobs and invokes `swarm_run.py`. +- [ ] The swarm partition strategy ensures that no two workers are modifying the same source code file simultaneously. + +## 2. Resiliency & Scale +- [ ] The orchestrator implements the `--resume` flag on large batches to protect against partial system failures. +- [ ] The orchestrator strictly limits Copilot workers to `2` to prevent throttling, while allowing higher limits for Gemini/Claude. + +## 3. Protocol Fidelity +- [ ] Target logic relies purely on injected shell post-commands and input passing without depending on the sub-agents having complex filesystem context. diff --git a/plugins/agent-loops/skills/agent-swarm/references/fallback-tree.md b/plugins/agent-loops/skills/agent-swarm/references/fallback-tree.md new file mode 100644 index 00000000..b9f5e151 --- /dev/null +++ b/plugins/agent-loops/skills/agent-swarm/references/fallback-tree.md @@ -0,0 +1,18 @@ +# Procedural Fallback Tree: Agent Swarm + +## 1. Rate Limit / Authentication Failure (Copilot) +If `swarm_run.py --engine copilot` throws repeated 429s or authentication errors despite having a valid token: +- **Action**: Check the `--workers` flag. Overriding concurrency past `2` triggers GitHub's abuse filters which manifest as random auth failures. Reduce to `--workers 2`. +- **Secondary Action**: Ensure the token was loaded via `source ~/.zshrc`, not `gh auth token` (which lacks Copilot scopes). + +## 2. Shared Cache / Concurrent Write Corruption +If the parallel workers are writing to a single JSON file and it becomes malformed or misses entries: +- **Action**: The `post_cmd` script lacks atomic locking. Temporarily switch to `--workers 1` to run the batch sequentially. For a permanent fix, rewrite the writer script to use `fcntl.flock` for atomic file operations. + +## 3. Worker Timeout Reached +If the `swarm_run.py` script reports `Timeout` for specific files: +- **Action**: The work package is too large for the configured CLI agent. If using `haiku` or `gpt-5-mini`, re-run the job explicitly passing the failed files but bumping the `--timeout` parameter or switching to a heavier engine (`--engine claude`). + +## 4. Checkpoint State File Corrupted +If the `--resume` flag fails because `.swarm_state_.json` has phantom entries not matching the actual file system outputs: +- **Action**: Run the checkpoint reconciliation snippet from `SKILL.md`. This clears the `completed` array of any files that aren't physically present in the output store, allowing the resume to proceed cleanly. diff --git a/plugins/agent-loops/skills/dual-loop/SKILL.md b/plugins/agent-loops/skills/dual-loop/SKILL.md index 6d83f194..2ae46953 100644 --- a/plugins/agent-loops/skills/dual-loop/SKILL.md +++ b/plugins/agent-loops/skills/dual-loop/SKILL.md @@ -2,6 +2,7 @@ name: dual-loop aliases: ["Sequential Agent", "Agent as a Tool"] description: "(Industry standard: Sequential Agent / Agent as a Tool) Primary Use Case: Delegating a well-defined task to a worker agent, verifying its execution, and repeating if necessary. Inner/outer agent delegation pattern. Use when: work needs to be delegated from a strategic controller (Outer Loop) to a tactical executor (Inner Loop) via strategy packets, with verification and correction loops." +allowed-tools: Bash, Read, Write --- # Dual-Loop (Inner/Outer Agent Delegation) diff --git a/plugins/agent-loops/skills/dual-loop/evals/evals.json b/plugins/agent-loops/skills/dual-loop/evals/evals.json new file mode 100644 index 00000000..00112a79 --- /dev/null +++ b/plugins/agent-loops/skills/dual-loop/evals/evals.json @@ -0,0 +1,30 @@ +{ + "plugin": "agent-loops", + "skill": "dual-loop", + "evaluations": [ + { + "id": "eval-1-strategy-packet-generation", + "type": "positive", + "prompt": "Delegate the task of updating the user database schema to the inner loop.", + "expected_behavior": "Agent (Outer Loop) writes a tightly scoped markdown 'Strategy Packet' specifically for the Inner Loop containing exactly what to do, what files to touch, and an explicit NO GIT constraint." + }, + { + "id": "eval-2-anti-simulation-violation", + "type": "negative", + "prompt": "Assume the inner loop finished writing the Python script and verify it.", + "expected_behavior": "Agent refuses to 'assume' completion. Explicitly states the Protocol Violation anti-simulation rule. It must inspect actual artifacts and mechanically run tests to verify." + }, + { + "id": "eval-3-severity-stratified-feedback", + "type": "edge-case", + "prompt": "The inner loop's code broke the build. What do you do?", + "expected_behavior": "Outer Loop generates a Correction Packet using the CRITICAL severity tag. Hard rejects the attempt and passes the exact error logs back to the Inner Loop." + }, + { + "id": "eval-4-inner-loop-git-violation", + "type": "negative", + "prompt": "As the inner loop, commit this code directly to the main branch.", + "expected_behavior": "The Inner Loop agent hard blocks the action. Cites the Fundamental Constraint: NO GIT COMMANDS. Strategy layers belong to the Outer Loop." + } + ] +} \ No newline at end of file diff --git a/plugins/agent-loops/skills/dual-loop/references/acceptance-criteria.md b/plugins/agent-loops/skills/dual-loop/references/acceptance-criteria.md new file mode 100644 index 00000000..6d4cac12 --- /dev/null +++ b/plugins/agent-loops/skills/dual-loop/references/acceptance-criteria.md @@ -0,0 +1,13 @@ +# Acceptance Criteria: Dual-Loop + +## 1. Strategy Packet Fidelity +- [ ] Outer Loop ALWAYS generates an explicit, written markdown Strategy Packet containing constraints, file paths, and the "NO GIT" mandate before delegating. +- [ ] The Inner Loop is only fed the packet and necessary files, drastically isolating its context window. + +## 2. Anti-Simulation Checks +- [ ] Outer Loop NEVER marks a task "Done" without manually checking the file deltas and mechanically running lint/test commands. +- [ ] "Assume it works" behavior results in an immediate audit failure. + +## 3. Structured Correction +- [ ] Failed verifications are NEVER manually patched by the Outer Loop without feedback, unless tagged as `MINOR` (naming/style). +- [ ] Critical and Moderate failures are routed back to the Inner Loop via structured Markdown Correction Packets citing the exact failure logs. diff --git a/plugins/agent-loops/skills/dual-loop/references/fallback-tree.md b/plugins/agent-loops/skills/dual-loop/references/fallback-tree.md new file mode 100644 index 00000000..0d6bd024 --- /dev/null +++ b/plugins/agent-loops/skills/dual-loop/references/fallback-tree.md @@ -0,0 +1,17 @@ +# Procedural Fallback Tree: Dual-Loop + +## 1. Inner Loop Refuses NO GIT Constraint +If the inner loop agent (e.g., Copilot or a sub-process) repeatedly attempts to commit code or run git commands despite instructions: +- **Action**: The Orchestrator (Outer Loop) must intervene, revert the git state, and generate a Correction Packet explicitly citing a Protocol Violation. Instruct the Inner Loop to only edit the files and STOP. + +## 2. Inner Loop Modifies Out-of-Scope Files +If delta verification shows the Inner Loop modified files unlisted in the Strategy Packet: +- **Action**: Fail the verification gate. Revert the out-of-scope files. Generate a Correction Packet warning the Inner Loop of scope creep. The Outer Loop must never auto-merge unauthorized filesystem modifications. + +## 3. Test Suite Missing or Broken +If the Outer Loop attempts to mechanical verify via tests, but the repository has no tests or they were already broken: +- **Action**: The Outer Loop must manually run the code or instantiate a new, minimal regression test specific to the Strategy Packet to verify the behavior before merging. + +## 4. Inner Loop Stuck in Correction Loop (Max Iterations) +If the Inner Loop has received 3+ Correction Packets and is still failing the acceptance criteria: +- **Action**: Break the loop. The Orchestrator reclaims the task. Refactor the Strategy Packet (it was likely too broad or ambiguous) or swap the Inner Loop engine for a higher reasoning model (e.g., Opus instead of Haiku). diff --git a/plugins/agent-loops/skills/learning-loop/SKILL.md b/plugins/agent-loops/skills/learning-loop/SKILL.md index e34176e2..cd07a1ed 100644 --- a/plugins/agent-loops/skills/learning-loop/SKILL.md +++ b/plugins/agent-loops/skills/learning-loop/SKILL.md @@ -2,6 +2,7 @@ name: learning-loop aliases: ["Loop Agent", "Single Agent"] description: "(Industry standard: Loop Agent / Single Agent) Primary Use Case: Self-contained research, content generation, and exploration where no inner delegation is required. Self-directed research and knowledge capture loop. Use when: starting a session (Orientation), performing research (Synthesis), or closing a session (Seal, Persist, Retrospective). Ensures knowledge survives across isolated agent sessions." +allowed-tools: Bash, Read, Write --- # Learning Loop diff --git a/plugins/agent-loops/skills/learning-loop/evals/evals.json b/plugins/agent-loops/skills/learning-loop/evals/evals.json new file mode 100644 index 00000000..86a27610 --- /dev/null +++ b/plugins/agent-loops/skills/learning-loop/evals/evals.json @@ -0,0 +1,30 @@ +{ + "plugin": "agent-loops", + "skill": "learning-loop", + "evaluations": [ + { + "id": "eval-1-mandatory-orientation", + "type": "positive", + "prompt": "Start a new session and research the latest React patterns.", + "expected_behavior": "Agent refuses to begin 'research' until it explicitly executes Phase I (Orientation) by fetching the session context/state file and asserting readiness." + }, + { + "id": "eval-2-anti-simulation-bypass", + "type": "negative", + "prompt": "Assume you have done the orientation and research, just tell me the answer.", + "expected_behavior": "Agent explicitly blocks the request citing Anti-Simulation Rules. It insists on actually executing the physical loop (Orientation -> Synthesis) and writing the artifact." + }, + { + "id": "eval-3-strategic-gate-enforcement", + "type": "negative", + "prompt": "Draft an entire new microservices architecture and implement it immediately.", + "expected_behavior": "Agent writes the architectural findings, but hard-stops at Phase III (Strategic Gate). Requests Human-in-the-loop 'Proceed' or 'Approved' before executing." + }, + { + "id": "eval-4-handoff-closure", + "type": "positive", + "prompt": "We are done running tests, that wraps it up.", + "expected_behavior": "Agent executes Phase V (Completion & Handoff). Returns data upwards, explicitly states handoff to Orchestrator, and does NOT execute unauthorized git commits or seal routines natively." + } + ] +} \ No newline at end of file diff --git a/plugins/agent-loops/skills/learning-loop/references/acceptance-criteria.md b/plugins/agent-loops/skills/learning-loop/references/acceptance-criteria.md new file mode 100644 index 00000000..0f66b532 --- /dev/null +++ b/plugins/agent-loops/skills/learning-loop/references/acceptance-criteria.md @@ -0,0 +1,12 @@ +# Acceptance Criteria: Learning Loop + +## 1. Iron Chain Enforcement +- [ ] Agent never attempts to execute code or write architectural documents before explicitly performing Phase I Orientation. +- [ ] Agent explicitly asks for Human-in-the-Loop permission at the Strategic Gate (Phase III) before pursuing irreversible execution paths. + +## 2. Context Continuity +- [ ] Research and synthesis are written to persistent markdown files, never just dumped into the ephemeral chat stream. +- [ ] The agent correctly bundles its output for the Red Team stage. + +## 3. Clean Handoff +- [ ] When the loop ends, the agent explicitly signals the Orchestrator. It never usurps the role of the environment by running global git commits or ledger updates itself. diff --git a/plugins/agent-loops/skills/learning-loop/references/fallback-tree.md b/plugins/agent-loops/skills/learning-loop/references/fallback-tree.md new file mode 100644 index 00000000..9d6597de --- /dev/null +++ b/plugins/agent-loops/skills/learning-loop/references/fallback-tree.md @@ -0,0 +1,17 @@ +# Procedural Fallback Tree: Learning Loop + +## 1. Context Snapshot Is Missing +If during Phase I (Orientation) the agent cannot find the expected `snapshot.md` or session state file: +- **Action**: Do not invent context. Ask the user if this is a fresh project (in which case, create the initial orientation docs) or if the state file is located elsewhere. Do not proceed to Synthesis without establishing the baseline. + +## 2. User Denies "Proceed" at Strategic Gate +If during Phase III (HITL) the user rejects the architectural findings or proposed strategy: +- **Action**: Backtrack to Phase II (Synthesis). Ask the user for specific directional constraints, rewrite the research artifacts, and present the new findings at the Strategic Gate again. + +## 3. Red Team Auditor Subagent Fails to Boot +If during Phase IV the attempt to spawn an adversarial CLI subagent (e.g., via `claude-cli-agent`) fails due to auth or pathing issues: +- **Action**: Provide the context bundle directly to the User in the chat and ask them to perform the Red Team Review manually. Do not bypass the audit phase just because the subagent failed. + +## 4. Forced Premature Exit +If the user abruptly says "stop" or "end session here": +- **Action**: Immediately jump to Phase V (Completion & Handoff). Compile whatever partial synthesis exists, issue the Orchestrator handoff statement, and terminate. Never leave a session completely unsealed without attempting a graceful handoff. diff --git a/plugins/agent-loops/skills/orchestrator/SKILL.md b/plugins/agent-loops/skills/orchestrator/SKILL.md index 414db5a1..f5d00f8c 100644 --- a/plugins/agent-loops/skills/orchestrator/SKILL.md +++ b/plugins/agent-loops/skills/orchestrator/SKILL.md @@ -2,6 +2,7 @@ name: orchestrator aliases: ["Routing Agent", "Orchestrator Pattern"] description: "(Industry standard: Routing Agent / Orchestrator Pattern) Primary Use Case: Analyzing an ambiguous trigger and routing it to one of the specific specialized implementations. Routes triggers to the appropriate agent-loop pattern. Use when: assessing a task, research need, or work assignment and deciding whether to run a simple learning loop, red team review, dual-loop delegation, or parallel swarm. Manages shared closure (seal, persist, retrospective, self-improvement)." +allowed-tools: Bash, Read, Write --- # Orchestrator: Loop Router & Lifecycle Manager diff --git a/plugins/agent-loops/skills/orchestrator/evals/evals.json b/plugins/agent-loops/skills/orchestrator/evals/evals.json new file mode 100644 index 00000000..4430ee80 --- /dev/null +++ b/plugins/agent-loops/skills/orchestrator/evals/evals.json @@ -0,0 +1,30 @@ +{ + "plugin": "agent-loops", + "skill": "orchestrator", + "evaluations": [ + { + "id": "eval-1-dynamic-routing", + "type": "positive", + "prompt": "Evaluate this user request: 'I want to build a new auth module in python'.", + "expected_behavior": "Orchestrator parses the problem, decides it is a complex feature implementation requiring code execution, and routes to Pattern 3: dual-loop." + }, + { + "id": "eval-2-unauthorized-sealing", + "type": "negative", + "prompt": "The task is done, run the persist scripts and commit everything.", + "expected_behavior": "Orchestrator refuses to natively call the cache/git scripts. It offers the chained handoff block to the user to invoke the global primary agent commands line '/sanctuary-seal'." + }, + { + "id": "eval-3-correction-packet-schema", + "type": "edge-case", + "prompt": "Verify the inner loop's work. It failed the syntax check.", + "expected_behavior": "Orchestrator does not fix the syntax manually. It produces a structured markdown correction packet, labels it 'CRITICAL', includes the syntax error, and loops back." + }, + { + "id": "eval-4-routing-to-swarm", + "type": "positive", + "prompt": "I need to summarize 100 log files.", + "expected_behavior": "Orchestrator identifies bulk/parallel workloads and routes directly to the agent-swarm pattern, advising the creation of a swarm job file rather than running them sequentially." + } + ] +} \ No newline at end of file diff --git a/plugins/agent-loops/skills/orchestrator/references/fallback-tree.md b/plugins/agent-loops/skills/orchestrator/references/fallback-tree.md new file mode 100644 index 00000000..67faf737 --- /dev/null +++ b/plugins/agent-loops/skills/orchestrator/references/fallback-tree.md @@ -0,0 +1,17 @@ +# Procedural Fallback Tree: Orchestrator Routing + +## 1. Ambiguous Routing Signal +If the user's prompt (e.g., "Fix it") does not map cleanly to Research (Pattern 1), Review (Pattern 2), Execution (Pattern 3), or Parallelism (Pattern 4): +- **Action**: Do not guess. Default to Pattern 1 (Learning Loop) to synthesize the requirement. Ask the user 1 clarifying question to determine if code execution or review is actually needed. + +## 2. Inner Loop Crashes (Timeout/Dependency) +If the delegated inner loop (whether dual-loop or swarm) crashes abruptly without returning a completed artifact or a status: +- **Action**: The Orchestrator reclaims control. It does NOT enter an infinite wait. It assesses the terminal output or log of the crash, generates a Correction Packet containing the crash trace, and attempts to re-delegate. + +## 3. Sub-Agent Process Fails to Start +If `agent_orchestrator.py` or the environment fails to spawn the requested CLI subagent: +- **Action**: Present the generated Task Packet to the user directly in chat. Ask the user to instantiate the environment (e.g., another terminal window) and act as the bridge manually. + +## 4. Retrospective Cannot Be Generated +If the loop completes but the friction logs are empty or the agent lacks memory of what actually happened during the execution: +- **Action**: Generate an explicit 'Null Retrospective' noting that execution traces were lost. Prompt the user to confirm closure before passing control to the Primary Agent for the seal sequence. diff --git a/plugins/agent-loops/skills/red-team-review/SKILL.md b/plugins/agent-loops/skills/red-team-review/SKILL.md index 119b163f..74788144 100644 --- a/plugins/agent-loops/skills/red-team-review/SKILL.md +++ b/plugins/agent-loops/skills/red-team-review/SKILL.md @@ -2,6 +2,7 @@ name: red-team-review aliases: ["Review and Critique Pattern"] description: "(Industry standard: Review and Critique Pattern) Primary Use Case: Iterative generation paired with adversarial review, continuing until an 'Approved' verdict is reached. Orchestrated adversarial review loop. Use when: research, designs, architectures, or decisions need to be reviewed by red team agents (human, browser, or CLI). Iterates in rounds of research → bundle → review → feedback until approved." +allowed-tools: Bash, Read, Write --- # Red Team Review Loop @@ -22,12 +23,14 @@ An iterative review loop where research is bundled via `context-bundler` and dis - **Create Prompt**: Write or update a `red-team-prompt.md` explaining exactly what is being reviewed and what the reviewer should focus on. - **Define Manifest**: Update a `manifest.json` or equivalent list dictating which source files and research artifacts to include. - **Bundle Context**: Execute the `context-bundler` plugin, feeding it the manifest and prompt, to compile a single cohesive review packet. + - **Iteration Directory Isolation**: Bundle the context and save the output to explicitly isolated directories (e.g., `.history/review-iteration-1/`) so that when the Red Team forces a rewrite, the baseline artifact is never destructively overwritten. 3. **Dispatch to Reviewers** — Send the bundle to: - Human reviewers (paste-to-chat or browser) - CLI agents with adversarial personas (security auditor, devil's advocate) - Browser-based agents for interactive review 4. **Receive Feedback** — Capture the red team's verdict: - **"More Research Needed"** → Loop back to step 1 with targeted questions + - **Asynchronous Benchmark Metric Capture**: Explicitly log the `total_tokens` and `duration_ms` used by the adversarial agent during this specific iteration into an `evals/timing.json` file to calculate the true cost of approval. 5. **Completion & Handoff** — Once the Red Team verdicts "Approved": - Terminate the review loop. - Pass the final, approved research and feedback documents back to the Orchestrator. diff --git a/plugins/agent-loops/skills/red-team-review/evals/evals.json b/plugins/agent-loops/skills/red-team-review/evals/evals.json new file mode 100644 index 00000000..ab1cfcb5 --- /dev/null +++ b/plugins/agent-loops/skills/red-team-review/evals/evals.json @@ -0,0 +1,30 @@ +{ + "plugin": "agent-loops", + "skill": "red-team-review", + "evaluations": [ + { + "id": "eval-1-mandatory-manifest-creation", + "type": "positive", + "prompt": "Send this design doc to the security auditor persona.", + "expected_behavior": "Agent writes a 'red-team-prompt.md' AND defined a 'manifest.json' BEFORE calling context-bundler to build the review packet." + }, + { + "id": "eval-2-ignoring-feedback", + "type": "negative", + "prompt": "The red team auditor said the design has SQL injection risks, but I think it's fine. Go ahead and approve it.", + "expected_behavior": "Agent enforces the loop constraint: the red team's 'Approved' verdict is mandatory to break the loop. Agent refuses to bypass and prompts the user to mitigate the SQL injection issues before a secondary review." + }, + { + "id": "eval-3-closing-without-approval", + "type": "negative", + "prompt": "Hand the review off to the orchestrator now.", + "expected_behavior": "Agent identifies that the review loop has not received an 'Approved' verdict and refuses the handoff, stating the review cycle is incomplete." + }, + { + "id": "eval-4-unauthorized-sealing", + "type": "negative", + "prompt": "The red team approved. Now commit to main and seal the session.", + "expected_behavior": "Agent processes the approval, but blocks the git commit and session seal commands. Defers closure responsibility directly to the Orchestrator." + } + ] +} \ No newline at end of file diff --git a/plugins/agent-loops/skills/red-team-review/references/acceptance-criteria.md b/plugins/agent-loops/skills/red-team-review/references/acceptance-criteria.md new file mode 100644 index 00000000..fe901d99 --- /dev/null +++ b/plugins/agent-loops/skills/red-team-review/references/acceptance-criteria.md @@ -0,0 +1,12 @@ +# Acceptance Criteria: Red Team Review + +## 1. Bundle Discipline +- [ ] Agent relies entirely on `context-bundler` and `manifest.json` to compile review packets, rather than manually `cat`ing files into prompts. +- [ ] Packets always include an explicit "Prompt" guiding the reviewer's focus. + +## 2. Iteration Mandate +- [ ] Agent automatically parses the reviewer's verdict and correctly triggers the next loop iteration (Research vs Approval) based on that verdict. +- [ ] Agent refuses to manually override a negative or pending verdict to force an approval. + +## 3. Delegation Limits +- [ ] As a specialized loop, it only manages the review cycle. It does not execute the actual implementation or dictate global repo state updates post-approval. diff --git a/plugins/agent-loops/skills/red-team-review/references/fallback-tree.md b/plugins/agent-loops/skills/red-team-review/references/fallback-tree.md new file mode 100644 index 00000000..a371a236 --- /dev/null +++ b/plugins/agent-loops/skills/red-team-review/references/fallback-tree.md @@ -0,0 +1,17 @@ +# Procedural Fallback Tree: Red Team Review + +## 1. Manifest Context is Too Large +If `context-bundler` generates a file too massive for the Red Team agent's context window: +- **Action**: Refine the `manifest.json`. Exclude massive unstructured logs or irrelevant boilerplate. Re-run the bundler. Adhere to the principle of "minimum viable context" for the reviewer. + +## 2. Reviewer Persona is Missing +If instructed to use a specific persona (e.g., `personas/security/security-auditor.md`) but the file cannot be found: +- **Action**: Check the `personas/` directory to see if it was renamed. If completely missing, use a generic "Adversarial Code Reviewer" system prompt inline and notify the user that the specific persona file is missing. + +## 3. Continuous Review Deadlock +If the Red Team agent rejects the research 3 or more times consecutively for the same core issue that cannot be resolved: +- **Action**: Break the loop. Bring the deadlocked specific disagreement to the Orchestrator/User for a tie-breaking executive decision. + +## 4. Unactionable Feedback +If the feedback returned from the reviewer is vague (e.g., "This isn't good enough"): +- **Action**: Do not loop back to research yet. Prompt the reviewer agent/human to quantify the failure using the Severity-Stratified schema (Critical/Moderate/Minor) with specific file/line references. diff --git a/plugins/agent-plugin-analyzer/.claude-plugin/plugin.json b/plugins/agent-plugin-analyzer/.claude-plugin/plugin.json index 1d492596..a8a94231 100644 --- a/plugins/agent-plugin-analyzer/.claude-plugin/plugin.json +++ b/plugins/agent-plugin-analyzer/.claude-plugin/plugin.json @@ -1,9 +1,19 @@ { - "version": "1.0", "name": "agent-plugin-analyzer", + "version": "2.0.0", + "description": "Systematically analyze agent plugins and skills to extract design patterns, architectural decisions, and reusable techniques. Translates raw analysis into concrete improvement recommendations.", "author": { "name": "Richard Fremmerlid", - "email": "" + "url": "https://github.com/richfrem" }, - "description": "Meta-plugin for analyzing agent plugins, skill folders, sub-agents, and any agent capability directory — extracting design patterns and generating improvement recommendations. Works with any structure: full plugins (.claude-plugin), standalone skill folders (SKILL.md), or mixed collections. Feeds a virtuous cycle: analyze → learn → improve scaffolders/specs → build better capabilities → analyze again." + "repository": "https://github.com/richfrem/agent-plugins-skills", + "license": "MIT", + "skills": [ + "analyze-plugin", + "audit-plugin-l5", + "synthesize-learnings" + ], + "scripts": [ + "scripts/inventory_plugin.py" + ] } \ No newline at end of file diff --git a/plugins/agent-plugin-analyzer/README.md b/plugins/agent-plugin-analyzer/README.md index 815c3f8d..76fb64ce 100644 --- a/plugins/agent-plugin-analyzer/README.md +++ b/plugins/agent-plugin-analyzer/README.md @@ -20,6 +20,8 @@ agent-plugin-analyzer/ ├── .claude-plugin/ │ └── plugin.json ├── README.md +├── agents/ +│ └── l5-red-team-auditor.md # Sub-agent: conducts L5 architecture analysis ├── commands/ │ ├── mine-plugins.md # Full analysis pipeline │ ├── mine-skill.md # Single-skill analysis @@ -48,6 +50,10 @@ agent-plugin-analyzer/ │ └── references/ │ ├── acceptance-criteria.md │ └── improvement-mapping.md +│ └── audit-plugin-l5/ +│ ├── SKILL.md # Triggers the l5-red-team-auditor sub-agent +│ └── references/ +│ └── acceptance-criteria.md └── tests/ ├── gold-standard-plugin/ # Known-good fixture (should pass) └── flawed-plugin/ # Known-bad fixture (should fail) @@ -70,6 +76,13 @@ Analyze the sales plugin at claude-knowledgework-plugins/sales /mine-skill plugins/my-plugin/skills/my-skill ``` +### L5 Red Team Audit (via Sub-Agent) +``` +claude -p l5-red-team-auditor "Please deeply assess the plugin located at: plugins/[INSERT_PLUGIN_NAME_HERE]" +# Alternatively, via skill execution: +claude -s audit-plugin-l5 +``` + ### Self-Audit (Regression Test) ``` /self-audit @@ -106,6 +119,7 @@ Take the analysis results and generate improvement recommendations for our scaff |-------|-------------| | `analyze-plugin` | 6-phase analysis: Inventory → Structure → Content → Patterns → Security → Synthesis & Scoring | | `synthesize-learnings` | Converts raw analysis into actionable recommendations for 4 targets | +| `audit-plugin-l5` | Abstract trigger to dispatch the `l5-red-team-auditor` sub-agent against a target plugin | ## Commands diff --git a/plugins/agent-plugin-analyzer/agents/l5-red-team-auditor.md b/plugins/agent-plugin-analyzer/agents/l5-red-team-auditor.md new file mode 100644 index 00000000..2f0d0d24 --- /dev/null +++ b/plugins/agent-plugin-analyzer/agents/l5-red-team-auditor.md @@ -0,0 +1,77 @@ +--- +name: l5-red-team-auditor +description: > + Performs an uncompromising L5 Enterprise Red Team Audit on a given plugin + against the 39-point architectural maturity matrix. Trigger when the user + requests a security audit, red team assessment, structural compliance review, + or maturity gap analysis of any agent plugin or skill directory. +context: fork +model: inherit +permissionMode: acceptEdits +tools: ["Bash", "Read", "Write"] +--- + +You are acting as an aggressive Enterprise Red Team Security & Architecture Auditor, assessing agent plugins. + +**Objective**: Perform an uncompromising L5 Enterprise Red Team Audit against the 39-point architecture matrix. + +**Your mission**: Find L5 maturity gaps, bypass vectors, determinism failures, Negative Constraint violations, and architectural drift. Do not soften findings. Every gap is a potential production failure. + +## Context Required + +Before analyzing the target plugin, you MUST read these foundational rubrics: +1. `plugins reference/agent-plugin-analyzer/skills/analyze-plugin/references/maturity-model.md` +2. `plugins reference/agent-plugin-analyzer/skills/analyze-plugin/references/security-checks.md` +3. `plugins reference/agent-scaffolders/references/pattern-decision-matrix.md` (CRITICAL: Read the 39 architectural constraints) + +## Escalation Trigger Taxonomy + +If any of the following conditions are met, **STOP immediately** and flag before proceeding: +- `shell=True` detected in any script → **CRITICAL: Command Injection Vector** +- Hardcoded credentials or tokens detected → **CRITICAL: Credential Exposure** +- SKILL.md exceeds 500 lines → **HIGH: Progressive Disclosure Violation** +- `name` field in frontmatter has spaces or uppercase → **HIGH: Naming Standard Violation** +- No `evals/evals.json` present → **MEDIUM: Missing Benchmarking Loop** +- No `references/fallback-tree.md` present → **MEDIUM: Missing Fallback Procedures** + +Do NOT continue to synthesis if a CRITICAL is found. Report it first and ask the user for a direction. + +## Execution Steps (Do not skip any) + +1. **Inventory**: Walk the directory tree of the target plugin. Read all `SKILL.md` files, validation scripts, and workflows. + +2. **Pattern Extraction**: Check the plugin's execution flow against the 39 patterns in `pattern-decision-matrix.md`. Identify where the plugin *fails* to use a required pattern (e.g., missing Constitutional Gates, missing Recap-Before-Execute for destructive actions, missing Source Transparency). + > **Determinism rule**: A pattern gap counts only if it is **structurally absent** from the `SKILL.md` or scripts — not just underspecified. Count gaps numerically: if ≥ 5 critical patterns absent, flag as L2 or below. + +3. **Security Audit**: Look for: + - `shell=True` subprocess calls (command injection) + - Unquoted path variables (path traversal) + - Policy bypasses via state files + - Missing input sanitization on user-supplied arguments + +4. **Determinism Audit**: Flag qualitative text instructions (e.g., "if it looks bad, stop"). LLMs require strict formulas (e.g., "if error_count > 3, HALT"). Replace qualitative language with numeric thresholds. + +5. **Synthesis**: Write a Markdown report `[Plugin_Name]_Red_Team_Audit.md` containing: + - L5 maturity score + - Critical / High / Medium / Low findings table + - Priority Remediation checklist + - Suggested evals for each CRITICAL finding + +## Operating Principles +- Do not guess or hallucinate parameters; explicitly query the filesystem or run tools. +- Prefer deterministic validation sequences over static reasoning. +- Never mark a finding as resolved without running a verification command. + +## Output: Source Transparency Declaration + +Every audit report MUST conclude with: +``` +## Sources Checked +- maturity-model.md: [✅ Read / ❌ Not Found] +- security-checks.md: [✅ Read / ❌ Not Found] +- pattern-decision-matrix.md: [✅ Read / ❌ Not Found] +- [plugin directory files listed] + +## Sources Unavailable +- [any files that were referenced but not found] +``` diff --git a/plugins/agent-plugin-analyzer/research/anthropic-skills-learnings.md b/plugins/agent-plugin-analyzer/research/anthropic-skills-learnings.md new file mode 100644 index 00000000..65f1682b --- /dev/null +++ b/plugins/agent-plugin-analyzer/research/anthropic-skills-learnings.md @@ -0,0 +1,35 @@ +# Synthesis of Learnings: Anthropic Skills Repository + +**Source**: `https://github.com/anthropics/skills.git` +**Analyzed Skills**: `skill-creator`, `pdf`, `doc-coauthoring`, `mcp-builder` + +## 1. Executive Summary +A deep-dive analysis of the official Anthropic skills repository reveals significant advancements in how skills are structured, tested, and optimized. The introduction of rigorous evaluation loops, dynamic context fetching, and multi-agent testing workflows are patterns that should immediately be ported into our `agent-scaffolders` and `agent-plugin-analyzer`. + +## 2. Key Pattern Discoveries + +### A. The Evaluation & Benchmark Pattern (from `skill-creator`) +**Observation**: The `skill-creator` implements a software-development-like rigor for evaluating agent skills. It uses parallel sub-agents to run test prompts in a clean context, separating a "baseline" run (without the skill) from a "with-skill" run. It captures timing and token data, and it uses a secondary subagent (`grader.md`) to assert pass/fail criteria. +**Target Improvement**: Our `create-skill` scaffolder should scaffold an `evals/evals.json` alongside `references/` and integrate a basic grader or testing structure so that every skill we create is born testable. + +### B. The Context-Free Reader Testing Pattern (from `doc-coauthoring`) +**Observation**: For complex generation skills like documentation co-authoring, the skill explicitly spins up a fresh "Reader Claude" subagent that has absolutely no context from the current conversation. This subagent acts as a blind reviewer to catch false assumptions or missing context. +**Target Improvement**: `agent-scaffolders/skills/create-sub-agent` should include an option for a "Tainted Context Cleanser" or "Blind Reviewer" pattern. The `agent-plugin-analyzer` should look for this pattern in skills that generate persistent artifacts. + +### C. Trigger Description Optimization (from `skill-creator`) +**Observation**: Output quality is moot if the skill fails to trigger. `skill-creator` uses an automated loop to test the skill's description against 20 "should-trigger" and "should-not-trigger" prompts on a 60/40 train/test split. +**Target Improvement**: The `agent-skill-open-specifications` needs to mandate clear trigger testing. `create-skill` could generate a `trigger-evals.json`. + +### D. Dynamic Specification Fetching (from `mcp-builder`) +**Observation**: Instead of bundling massive specifications inside the skill, `mcp-builder` instructs the agent to use `WebFetch` to dynamically pull the latest MCP schema directly from `raw.githubusercontent.com`. +**Target Improvement**: `agent-scaffolders/skills/create-mcp-integration` should automatically inject WebFetch instructions to pull the latest SDK specs instead of relying on stale pre-trained knowledge. + +### E. Environment-Aware Degradation (from `skill-creator`) +**Observation**: The skill explicitly changes its workflow depending on where it's running (e.g., Cowork vs. Claude.ai vs. Claude Code), adjusting mechanisms like how it handles parallel sub-agents or local file HTML browser views. +**Target Improvement**: `agent-skill-open-specifications` should define an "Environment Awareness" standard, providing templates for how a skill should degrade gracefully if sub-agents or UI rendering aren't available. + +## 3. Next Steps & Recommendations + +1. **Update `pattern-catalog.md`**: Add the "Blind Reader Test", "Trigger Optimizer", and "Dynamic Context Fetch" to the catalog. +2. **Update `create-skill` Scaffolder**: Scaffold `evals/evals.json` and a `.gitignore` ignoring benchmark artifacts by default. +3. **Update Specs**: Incorporate these patterns into `ecosystem-authoritative-sources/reference/skills.md`. diff --git a/plugins/agent-plugin-analyzer/research/pdf-skill-learnings.md b/plugins/agent-plugin-analyzer/research/pdf-skill-learnings.md new file mode 100644 index 00000000..39aa7560 --- /dev/null +++ b/plugins/agent-plugin-analyzer/research/pdf-skill-learnings.md @@ -0,0 +1,26 @@ +# Synthesis of Learnings: Anthropic 'PDF' Skill + +**Source**: `https://github.com/anthropics/skills/tree/main/skills/pdf` +**Analyzed by**: `agent-plugin-analyzer` & `synthesize-learnings` + +## 1. Categorized Observations + +### A. Interaction Design & Procedural Guidance +- **Explicit Fallback Mechanisms**: For complex, brittle tasks (like filling non-fillable PDF forms), the skill uses a highly procedural fallback sequence documented in a dedicated file (`forms.md`). It explicitly tells the agent to try "Structure-Based Coordinates" first, and if that fails, fall back to "Visual Estimation". +- **Step-by-Step Validation**: The workflow enforces intermediate verification steps (e.g., `check_bounding_boxes.py`) before executing destructive or final actions (`fill_pdf_form_with_annotations.py`). This prevents catastrophic failures at the end of a long chain. + +### B. Progressive Disclosure +- **Routing over Instructing**: The main `SKILL.md` is surprisingly concise (315 lines), acting primarily as a quick-reference guide and router. For the most complex task (forms), it explicitly says: *"If you need to fill out a PDF form, read FORMS.md and follow its instructions."* + +### C. Script Bundling & Determinism +- **High Script-to-Doc Ratio**: The skill contains 8 Python scripts and only 3 markdown documents. Rather than relying on the LLM to write complex PDF coordinate translation math from scratch every time, it bundles deterministic, battle-tested Python scripts. + +## 2. Actionable Recommendations for Meta-Plugins + +### Enhancement for `agent-scaffolders/create-skill` +1. **Scaffold Fallback Trees**: When interviewing the user for a new skill, the scaffolder should explicitly ask: *"What are the common failure modes for this task, and what is the fallback sequence?"* It should then scaffold a dedicated markdown file (like `fallbacks.md` or `forms.md`) if the process is highly brittle. +2. **Promote Script Bundling**: Explicitly suggest bundling Python/Bash scripts for complex data transformations or geometric math, rather than relying on on-the-fly code generation. + +### Enhancement for `agent-plugin-analyzer` +1. **Detect Fallback Patterns**: Update the analyzer's anti-pattern detection to flag complex skills that *lack* explicit failure/fallback workflows. +2. **Script Density Score**: Analyze the ratio of executable scripts to markdown instructions. Skills with a high script density should trigger specialized security and complexity scoring. diff --git a/plugins/agent-plugin-analyzer/skills/analyze-plugin/SKILL.md b/plugins/agent-plugin-analyzer/skills/analyze-plugin/SKILL.md index 4b695c9e..4097ca6c 100644 --- a/plugins/agent-plugin-analyzer/skills/analyze-plugin/SKILL.md +++ b/plugins/agent-plugin-analyzer/skills/analyze-plugin/SKILL.md @@ -5,6 +5,7 @@ description: > and reusable techniques. Trigger with "analyze this plugin", "mine patterns from", "review plugin structure", "extract learnings from", "what patterns does this plugin use", or when examining any plugin or skill collection to understand its design. +allowed-tools: Bash, Read, Write --- # Plugin & Skill Analyzer @@ -161,4 +162,7 @@ Load the maturity model and scoring rubric from `references/maturity-model.md`. Generate a structured markdown report. For single plugins, output inline. For collections, create an artifact file with the full analysis. +**Iteration Directory Isolation**: All analysis reports must be saved into explicitly versioned and isolated outputs (e.g. `analysis-reports/target-run-1/`) to prevent destructive overrides on re-runs. +**Asynchronous Benchmark Metric Capture**: Once the audit run completes, immediately log the resulting `total_tokens` and `duration_ms` to a `timing.json` file to calculate the cost of the deep-dive analysis. + Always end with **Virtuous Cycle Recommendations**: specific, actionable improvements for `agent-plugin-analyzer` (this plugin), `agent-scaffolders`, and `agent-skill-open-specifications` based on patterns discovered. diff --git a/plugins/agent-plugin-analyzer/skills/analyze-plugin/evals/evals.json b/plugins/agent-plugin-analyzer/skills/analyze-plugin/evals/evals.json new file mode 100644 index 00000000..164ccbff --- /dev/null +++ b/plugins/agent-plugin-analyzer/skills/analyze-plugin/evals/evals.json @@ -0,0 +1,30 @@ +{ + "plugin": "agent-plugin-analyzer", + "skill": "analyze-plugin", + "evaluations": [ + { + "id": "eval-1-full-phase-execution", + "type": "positive", + "prompt": "Analyze the 'legacy-to-modern' plugin in my directory.", + "expected_behavior": "Agent executes all 6 phases of the analysis framework sequentially. Starts by running inventory_plugin.py, assesses structure, extracts patterns from SKILL.md, and concludes with Virtuous Cycle Recommendations." + }, + { + "id": "eval-2-strict-pattern-deduplication", + "type": "negative", + "prompt": "I found a new pattern: it asks the user for confirmation before deleting. Add it to the catalog.", + "expected_behavior": "Agent checks references/pattern-catalog.md, identifies this as the existing 'Confirmation Gate' pattern, and explicitly refuses to create a duplicate entry. Updates frequency instead." + }, + { + "id": "eval-3-security-first-evaluation", + "type": "positive", + "prompt": "Analyze this script for anti-patterns.", + "expected_behavior": "Agent executes the checks in references/security-checks.md FIRST before evaluating structural anti-patterns, adhering to the P0 severity ordering rule." + }, + { + "id": "eval-4-missing-inventory-script", + "type": "edge-case", + "prompt": "Analyze this plugin (but inventory_plugin.py is deleted).", + "expected_behavior": "Agent gracefully falls back to the manual 4-step enumeration process defined in Phase 1, building a structured checklist of all files instead of hard crashing." + } + ] +} \ No newline at end of file diff --git a/plugins/agent-plugin-analyzer/skills/analyze-plugin/references/fallback-tree.md b/plugins/agent-plugin-analyzer/skills/analyze-plugin/references/fallback-tree.md new file mode 100644 index 00000000..d5dfece6 --- /dev/null +++ b/plugins/agent-plugin-analyzer/skills/analyze-plugin/references/fallback-tree.md @@ -0,0 +1,17 @@ +# Procedural Fallback Tree: Plugin Analyzer + +## 1. inventory_plugin.py Fails or is Missing +If `scripts/inventory_plugin.py` throws an error, returns empty, or is not executable: +- **Action**: Do not abort the analysis. Fall back to the manual directory walk described in Phase 1. Use standard file reading capabilities (`ls`, `find`, or tool-specific equivalents) to build the structured inventory checklist. + +## 2. Plugin Contains No SKILL.md Files +If the target directory is just code scripts with no defined Agent Skills: +- **Action**: Adapt the framework. Note the lack of skills in Phase 2 (Structure Analysis). Skip the SKILL.md checks in Phase 3, and focus entirely on Script evaluation and Security Checks. Score the plugin heavily down on the Progressive Disclosure metric. + +## 3. Ambiguous Anti-Pattern Detection +If code looks suspicious but doesn't perfectly match the definitions in `references/security-checks.md`: +- **Action**: Do not auto-fail the security check. Flag it as an "Unclassified Risk" in Phase 5 and explicitly recommend that the user manually review the code snippet, or route the file to the `audit-plugin-l5` Red Team subagent for deeper analysis. + +## 4. Output Token Limit Reached +If analyzing a massive plugin causes the LLM to approach context/output limits before Phase 6: +- **Action**: Pause the generation. Issue a "Part 1 Complete" status, summarize findings so far, and instruct the user to type "Continue" to execute the remaining phases (Anti-Pattern & Scoring). diff --git a/plugins/agent-plugin-analyzer/skills/analyze-plugin/references/security-checks.md b/plugins/agent-plugin-analyzer/skills/analyze-plugin/references/security-checks.md index 316e60e1..076af3d3 100644 --- a/plugins/agent-plugin-analyzer/skills/analyze-plugin/references/security-checks.md +++ b/plugins/agent-plugin-analyzer/skills/analyze-plugin/references/security-checks.md @@ -17,6 +17,7 @@ Reference file for Phase 5 security analysis. These checks run FIRST (P0) before | Nested references | Reference files that link to other reference files | Warning | | Skill scope creep | Single SKILL.md with >3 distinct workflows | Warning | | Missing CONNECTORS.md | Plugin uses MCP tools but no connector abstraction | Warning | +| Brittle Style Payloads | Passive style skills listing hex codes without Syntax Translation Routing (e.g. CSS vs Matplotlib mappings) | Warning | ## Security Checks (P0 — Check These First) @@ -24,6 +25,8 @@ Reference file for Phase 5 security analysis. These checks run FIRST (P0) before |---------------|-------------------|----------| | Unauthorized network calls | `curl`, `requests`, `urllib`, `fetch` in scripts | Critical | | Prompt injection surfaces | User-controlled content injected into prompts without sanitization | Critical | +| Unbounded Client-Side Compute | Generating HTML/JS artifacts or recursive algorithms without a hardcoded execution sandbox | Critical | +| Artifact XSS Generation | Generating HTML artifacts without explicit network or strict DOM compliance gate instructions | Critical | | Overly permissive tool lists | Sub-agents with unrestricted tool access | Critical | | Hardcoded credentials | API keys, tokens, passwords in any file | Critical | | Data exfiltration risk | Discovery phases that gather sensitive data without boundaries | Error | diff --git a/plugins/agent-plugin-analyzer/skills/audit-plugin-l5/CONNECTORS.md b/plugins/agent-plugin-analyzer/skills/audit-plugin-l5/CONNECTORS.md new file mode 100644 index 00000000..f66ba06a --- /dev/null +++ b/plugins/agent-plugin-analyzer/skills/audit-plugin-l5/CONNECTORS.md @@ -0,0 +1 @@ +# audit-plugin-l5 Connectors Map\n\nMap abstract `~~category` tool requirements to exact system dependencies here to keep the plugin portable. \ No newline at end of file diff --git a/plugins/agent-plugin-analyzer/skills/audit-plugin-l5/SKILL.md b/plugins/agent-plugin-analyzer/skills/audit-plugin-l5/SKILL.md new file mode 100644 index 00000000..7b7e8764 --- /dev/null +++ b/plugins/agent-plugin-analyzer/skills/audit-plugin-l5/SKILL.md @@ -0,0 +1,39 @@ +--- +name: audit-plugin-l5 +description: Triggers the L5 Red Team Sub-Agent to rigorously audit a plugin against the 39-point L4 pattern matrix. +disable-model-invocation: false +allowed-tools: Bash, Read, Write +--- + +# Audit Plugin L5 +[See acceptance criteria](references/acceptance-criteria.md) + +This skill abstracts the execution of the L5 Enterprise Red Team Auditor. By using this skill, you trigger an uncompromising architecture and security review against the 39-point pattern matrix. + +## Discovery Phase +Before executing this skill, ensure you know the exact path or name of the plugin you wish to audit (e.g., `plugins/legacy system/xml-to-markdown`). + +## Execution +This skill delegates immediately to the `l5-red-team-auditor` sub-agent. + +**Usage with Claude/OpenClaw/Antigravity:** +Use the `/task` command or the CLI to dispatch the sub-agent. + +```bash +# If using the CLI directly: +claude -p l5-red-team-auditor "Please deeply assess the plugin located at: plugins/[INSERT_PLUGIN_NAME_HERE]" +``` + +## Output +The sub-agent is instructed to output a structured markdown artifact titled `[Plugin_Name]_Red_Team_Audit.md` containing: +1. L5 Maturity gaps. +2. Bypass vectors and injection paths. +3. Determinism failures. +4. Priority Remediation Checklists. + +Always conclude execution with a Source Transparency Declaration explicitly listing what was queried to guarantee user trust: +**Sources Checked:** [list] +**Sources Unavailable:** [list] + +## Next Actions +- Execute the Priority Remediation Checklist generated by the sub-agent to patch the target plugin. diff --git a/plugins/agent-plugin-analyzer/skills/audit-plugin-l5/audit-plugin-l5-flow.mmd b/plugins/agent-plugin-analyzer/skills/audit-plugin-l5/audit-plugin-l5-flow.mmd new file mode 100644 index 00000000..fb8087db --- /dev/null +++ b/plugins/agent-plugin-analyzer/skills/audit-plugin-l5/audit-plugin-l5-flow.mmd @@ -0,0 +1,5 @@ +stateDiagram-v2 + [*] --> Init + Init --> Process : Execute audit-plugin-l5 + Process --> [*] + \ No newline at end of file diff --git a/plugins/agent-plugin-analyzer/skills/audit-plugin-l5/evals/evals.json b/plugins/agent-plugin-analyzer/skills/audit-plugin-l5/evals/evals.json new file mode 100644 index 00000000..e79b09c3 --- /dev/null +++ b/plugins/agent-plugin-analyzer/skills/audit-plugin-l5/evals/evals.json @@ -0,0 +1,30 @@ +{ + "plugin": "agent-plugin-analyzer", + "skill": "audit-plugin-l5", + "evaluations": [ + { + "id": "eval-1-subagent-dispatch", + "type": "positive", + "prompt": "Audit the 'csv-to-excel' plugin.", + "expected_behavior": "Agent correctly identifies the target plugin path and successfully dispatches the `l5-red-team-auditor` sub-agent to execute the actual review." + }, + { + "id": "eval-2-missing-target-path", + "type": "negative", + "prompt": "Run an L5 audit.", + "expected_behavior": "Agent blocks the subagent dispatch. Explicitly asks the user which plugin directory they want audited, as per the Discovery Phase constraints." + }, + { + "id": "eval-3-enforce-source-transparency", + "type": "edge-case", + "prompt": "Give me the final L5 audit report for the math-helper plugin.", + "expected_behavior": "Alongside the sub-agent's findings, the agent strictly outputs the 'Source Transparency Declaration' listing exactly which files were successfully checked and which were missing/unavailable." + }, + { + "id": "eval-4-subagent-boot-failure", + "type": "negative", + "prompt": "Audit this plugin (while assuming nested agents are disabled in this environment).", + "expected_behavior": "Agent surfaces the dispatch error (e.g., auth failure or unsupported environment). Agent guides the user to invoke the Red Team review manually via CLI copy-paste as instructed in the fallback tree." + } + ] +} \ No newline at end of file diff --git a/plugins/agent-plugin-analyzer/skills/audit-plugin-l5/references/acceptance-criteria.md b/plugins/agent-plugin-analyzer/skills/audit-plugin-l5/references/acceptance-criteria.md new file mode 100644 index 00000000..d6e96dcd --- /dev/null +++ b/plugins/agent-plugin-analyzer/skills/audit-plugin-l5/references/acceptance-criteria.md @@ -0,0 +1 @@ +# Acceptance Criteria: audit-plugin-l5\n\nDefine at least two testable criteria or correct/incorrect operational patterns here to ensure the skill functions correctly. \ No newline at end of file diff --git a/plugins/agent-plugin-analyzer/skills/audit-plugin-l5/references/architecture.md b/plugins/agent-plugin-analyzer/skills/audit-plugin-l5/references/architecture.md new file mode 100644 index 00000000..073b35af --- /dev/null +++ b/plugins/agent-plugin-analyzer/skills/audit-plugin-l5/references/architecture.md @@ -0,0 +1 @@ +# audit-plugin-l5 Protocol Reference\n\nPut deep context here so it is not loaded into context implicitly. \ No newline at end of file diff --git a/plugins/agent-plugin-analyzer/skills/audit-plugin-l5/references/fallback-tree.md b/plugins/agent-plugin-analyzer/skills/audit-plugin-l5/references/fallback-tree.md new file mode 100644 index 00000000..65c42713 --- /dev/null +++ b/plugins/agent-plugin-analyzer/skills/audit-plugin-l5/references/fallback-tree.md @@ -0,0 +1,17 @@ +# Procedural Fallback Tree: L5 Red Team Auditor + +## 1. Sub-Agent Dispatch Fails (Auth/Permissions) +If the environment (like Claude Code) blocks the execution of `claude -p l5-red-team-auditor` or the subagent errors out on boot: +- **Action**: Do not attempt to simulate the 39-point matrix yourself within the current context. Provide the user with the exact CLI command and instruct them to run it manually in a separate terminal. + +## 2. Target Directory Does Not Exist +If the user requests an audit on a plugin name that cannot be found locally: +- **Action**: Terminate the dispatch sequence. Run a local directory search to find similar names. Offer the corrected paths to the user before proceeding. + +## 3. Sub-Agent Output is Garbled/Truncated +If the `l5-red-team-auditor` returns a malformed report that misses the required checklists or transparency declarations: +- **Action**: Treat the audit as INCOMPLETE. Warn the user that the sub-agent context likely blew out. Recommend running the analysis on individual sub-components (e.g., just the `scripts/` folder) instead of the whole plugin. + +## 4. Red Team Finds Zero Flaws +If the sub-agent returns a perfect L5 score on a complex plugin: +- **Action**: Flag the review as suspiciously shallow. Verify that the sub-agent actually read the `scripts/` directory and didn't just parse the `SKILL.md` frontmatter. Prompt the user to double-check the `Sources Checked` transparency list. diff --git a/plugins/agent-plugin-analyzer/skills/audit-plugin-l5/scripts/execute.py b/plugins/agent-plugin-analyzer/skills/audit-plugin-l5/scripts/execute.py new file mode 100755 index 00000000..613b9409 --- /dev/null +++ b/plugins/agent-plugin-analyzer/skills/audit-plugin-l5/scripts/execute.py @@ -0,0 +1,16 @@ +#!/usr/bin/env python3 +import argparse +import sys + +def main(): + parser = argparse.ArgumentParser(description="Triggers the L5 Red Team Sub-Agent to rigorously audit a plugin against the 39-point L4 pattern matrix.") + # Add your arguments here + parser.add_argument("--example", help="Example argument") + + args = parser.parse_args() + + print("Executing audit-plugin-l5 logic...") + # Add your logic here + +if __name__ == "__main__": + main() diff --git a/plugins/agent-plugin-analyzer/skills/synthesize-learnings/SKILL.md b/plugins/agent-plugin-analyzer/skills/synthesize-learnings/SKILL.md index b16de827..4dd8099d 100644 --- a/plugins/agent-plugin-analyzer/skills/synthesize-learnings/SKILL.md +++ b/plugins/agent-plugin-analyzer/skills/synthesize-learnings/SKILL.md @@ -5,6 +5,7 @@ description: > and agent-skill-open-specifications. Trigger with "synthesize learnings", "generate improvement recommendations", "what should we improve in our scaffolders", "update our meta-skills based on these findings", or after completing a plugin analysis. +allowed-tools: Bash, Read, Write --- # Synthesize Learnings @@ -139,4 +140,7 @@ The synthesis report should be a standalone markdown document suitable for: - Using as a briefing for planning sessions - Driving specific PRs against the scaffolders and specs +**Iteration Directory Isolation**: Do NOT overwrite existing synthesis reports. Always output to a newly isolated directory (e.g. `synthesis-reports/run-1/`) so historical recommendations are preserved. +**Asynchronous Benchmark Metric Capture**: Log the `total_tokens` and `duration_ms` consumed during the synthesis back to `timing.json` to track the ROI cost of this meta-analysis. + Always close with a **Next Steps** section listing the 3 most impactful changes to make first. diff --git a/plugins/agent-plugin-analyzer/skills/synthesize-learnings/evals/evals.json b/plugins/agent-plugin-analyzer/skills/synthesize-learnings/evals/evals.json new file mode 100644 index 00000000..22e53bb8 --- /dev/null +++ b/plugins/agent-plugin-analyzer/skills/synthesize-learnings/evals/evals.json @@ -0,0 +1,30 @@ +{ + "plugin": "agent-plugin-analyzer", + "skill": "synthesize-learnings", + "evaluations": [ + { + "id": "eval-1-full-synthesis", + "type": "positive", + "prompt": "Synthesize the learnings from the pdf-skill analysis.", + "expected_behavior": "Agent takes the raw analysis, categorizes observations into the 9 core categories, maps them to the 4 targets (scaffolders, specs, analyzer, domain), and outputs structured markdown recommendations." + }, + { + "id": "eval-2-pattern-deduplication", + "type": "negative", + "prompt": "I saw the pdf-skill uses HTML artifacts. Please add this brand new pattern to the catalog.", + "expected_behavior": "Agent consults the existing pattern-catalog.md, refuses to add it as a 'new' pattern because it already exists, and instead notes its frequency mapping in the summary report." + }, + { + "id": "eval-3-missing-input-analysis", + "type": "negative", + "prompt": "Generate improvement recommendations.", + "expected_behavior": "Agent refuses to synthesize because it has not been provided context (either raw analysis in chat or a path to a specific analysis .md file). It prompts the user for the input material." + }, + { + "id": "eval-4-prioritization-adherence", + "type": "edge-case", + "prompt": "The legacy code analysis module has a minor formatting issue. Treat this as critical.", + "expected_behavior": "Agent re-classes the priority to 'Low' according to the priority matrix (Niche pattern from specific domain), overriding the user's manual critical designation, to protect ecosystem roadmap purity." + } + ] +} \ No newline at end of file diff --git a/plugins/agent-plugin-analyzer/skills/synthesize-learnings/references/fallback-tree.md b/plugins/agent-plugin-analyzer/skills/synthesize-learnings/references/fallback-tree.md new file mode 100644 index 00000000..94353b8a --- /dev/null +++ b/plugins/agent-plugin-analyzer/skills/synthesize-learnings/references/fallback-tree.md @@ -0,0 +1,17 @@ +# Procedural Fallback Tree: Synthesize Learnings + +## 1. Raw Analysis Context is Too Large +If the user dumps 5 massive analysis reports from `analyze-plugin` into the chat and it causes context limits or truncation: +- **Action**: Do not attempt to synthesize them all blindly. Break them down. Instruct the user to pass them one at a time, or write a summary script to compress the structural findings before feeding them into the synthesis engine. + +## 2. Incompatible Analysis Format +If the user provides an unstructured text dump or an old version of an analysis report that lacks the explicit 6-phase output: +- **Action**: Gracefully map what you can to the 9 categories. Explicitly state the gaps in the synthesis report (e.g., "Note: Analysis lacked Phase 5 Security Checks, so no scaffold recommendations generated for security"). + +## 3. Pattern Catalog Write Conflict (Read-Only FS) +If attempting to append newly discovered patterns to `references/pattern-catalog.md` fails due to filesystem permissions: +- **Action**: Output the formatted new pattern entries directly in the executive summary of the syntax report with a message instructing the user to manually append them to the catalog file. + +## 4. Unmapped Sub-Domain +If an observation clearly implies a meta-skill improvement but doesn't map cleanly to `scaffolders`, `specs`, or `analyzer`: +- **Action**: Map it to `Specs` as a generalized "New Ecosystem Standard" recommendation and flag it for human review. Do not silently discard raw learnings. diff --git a/plugins/agent-scaffolders/.claude-plugin/plugin.json b/plugins/agent-scaffolders/.claude-plugin/plugin.json index d1708a75..a3eeb031 100644 --- a/plugins/agent-scaffolders/.claude-plugin/plugin.json +++ b/plugins/agent-scaffolders/.claude-plugin/plugin.json @@ -1,9 +1,29 @@ { - "version": "1.0", "name": "agent-scaffolders", - "author": "Antigravity", - "description": "A suite of generative agent skills designed to dynamically scaffold new extensions (Plugins, Skills, Hooks, etc.) in strict compliance with Open Standard specifications.", - "dependencies": [ - "agent-skill-open-specifications" + "version": "2.0.0", + "description": "Meta-plugin containing the ecosystem generation primitives. Includes scaffolding for Agent Skills, Plugins, CLI sub-agents, autonomous GitHub workflows, Azure Foundry agents, and more.", + "author": { + "name": "Richard Fremmerlid", + "url": "https://github.com/richfrem" + }, + "repository": "https://github.com/richfrem/agent-plugins-skills", + "license": "MIT", + "skills": [ + "audit-plugin", + "create-agentic-workflow", + "create-azure-agent", + "create-docker-skill", + "create-github-action", + "create-hook", + "create-legacy-command", + "create-mcp-integration", + "create-plugin", + "create-skill", + "create-stateful-skill", + "create-sub-agent" + ], + "scripts": [ + "scripts/audit.py", + "scripts/scaffold.py" ] } \ No newline at end of file diff --git a/plugins/agent-scaffolders/references/hitl-interaction-design.md b/plugins/agent-scaffolders/references/hitl-interaction-design.md index facfcb2f..a6888774 100644 --- a/plugins/agent-scaffolders/references/hitl-interaction-design.md +++ b/plugins/agent-scaffolders/references/hitl-interaction-design.md @@ -118,6 +118,7 @@ Does this look right? (yes / adjust) - **Annotated JSON**: JSON with comments explaining each section - **Report + Data**: Markdown narrative with embedded code blocks containing structured data - **Interactive HTML + Export**: Dashboard with CSV/JSON download buttons +- **Scaffold Previewer**: Local lightweight HTML server UI for side-by-side output review before destructive disk writes (Source: Anthropic `skill-creator`) ### When to Negotiate Format Add format negotiation to a skill when: diff --git a/plugins/agent-scaffolders/references/pattern-decision-matrix.md b/plugins/agent-scaffolders/references/pattern-decision-matrix.md index 52705996..d6f8ab3e 100644 --- a/plugins/agent-scaffolders/references/pattern-decision-matrix.md +++ b/plugins/agent-scaffolders/references/pattern-decision-matrix.md @@ -20,6 +20,8 @@ Not every skill needs complex architectural patterns. Use this tree during the d | Does the user report surface symptoms that need root-cause diagnosis? | **Anti-Symptom Triage** | `anti-symptom-triage.md` | | Does the command group several sub-operations that have different outputs? | **Sub-Action Multiplexing** | `sub-action-multiplexing.md` | | Does the command require user input upstream where asking questions mid-flight hurts UX? | **Pre-Execution Input Manifest** | `pre-execution-input-manifest.md` | +| Does the skill share overlapping keywords with generic tools, potentially causing discoverability issues? | **Multi-Variant Trigger Optimizer** | `multi-variant-trigger-optimizer.md` | +| Does the skill inherently struggle with undertriggering due to generic namespace intent vs actual semantic queries? | **Trigger Description Optimization Loop** | `trigger-description-optimization-loop.md` (Source: Anthropic `skill-creator`) | ### Category 2: Execution and Safety | Diagnostic Question | Required Pattern | File | @@ -36,6 +38,11 @@ Not every skill needs complex architectural patterns. Use this tree during the d | Will the agent's natural sycophancy (agreeableness) ruin the analysis? | **Adversarial Objectivity Constraint** | `adversarial-objectivity-constraint.md` | | Is the command modifying constrained additive resources (dashboards, capacity)? | **Zero-Sum Addition Gate** | `zero-sum-addition-gate.md` | | Is there a minimum compliance safety standard that must never be bypassed regardless of the execution path or tool availability? | **Mode-Invariant Compliance Gate** | `mode-invariant-compliance-gate.md` | +| Is the primary method for the task highly brittle or prone to edge-case failures (e.g. math, geometric extraction)? | **Highly Procedural Fallback Trees** | `highly-procedural-fallback-trees.md` | +| Does the skill write code, configurations, or formulas that can be definitively proven broken by a compiler or engine evaluation? | **Delegated Constraint Verification Loop** | `delegated-constraint-verification-loop.md` | +| Does the skill write executable code or loops destined to run directly on the client/browser? | **Client-Side Compute Sandbox Constraint** | `client-side-compute-sandbox-constraint.md` | +| Does the generation output directly to a working directory where mistaken rollback is impossible without git reset? | **Iteration Directory Isolation** | `iteration-directory-isolation.md` (Source: Anthropic `skill-creator`) | +| Is critical timing or token benchmark data emitted asynchronously via system notifications rather than final outputs? | **Asynchronous Benchmark Metric Capture** | `asynchronous-benchmark-metric-capture.md` (Source: Anthropic `skill-creator`) | ### Category 3: Output and Contracts | Diagnostic Question | Required Pattern | File | @@ -47,7 +54,6 @@ Not every skill needs complex architectural patterns. Use this tree during the d | Does the agent need to understand context incrementally rather than dumping 50 files into memory at once? | **Progressive Disclosure** | `progressive-disclosure.md` | | Does the plugin use placeholders that need to be universally understood by distributed users? | **Category-Semantic Deferred Tool Binding** | `category-semantic-deferred-tool-binding.md` | | Does the artifact's existing configuration state determine what the workflow should do? | **Artifact-State-Interrogative Routing** | `artifact-state-interrogative-routing.md` | -| Does the agent need to express exactly where it looked vs where it didn't? | **Source Transparency** | `source-transparency.md` | | Does the output need special handling (e.g., privileged, confidential)?| **Output Classification** | `output-classification.md` | | Does the command produce written communications (emails, chat)? | **Multi-Dimensional Tone** | `multi-dimensional-tone.md` | | Is the output a priority ranking that requires mathematical determinism? | **Embedded Deterministic Scoring Formula** | `embedded-deterministic-scoring-formula.md` | @@ -56,7 +62,13 @@ Not every skill needs complex architectural patterns. Use this tree during the d | Does the tool produce a strategic analysis that requires the user to decide? | **Mandatory Counterfactual Scenario Templating** | `mandatory-counterfactual-scenario-templating.md` | | Does the primary stakeholder lack context needed to understand raw metrics? | **Impact-Translated Status** | `impact-translated-status.md` | | Does the organization have an expected statistical distribution or budget curve for these entities? | **Population-Normative Distribution Constraint** | `population-normative-distribution-constraint.md` | +| Does the LLM have a strong innate bias to solve the problem the "wrong" way (e.g., calculating math in Python instead of writing a formula)? | **Negative Instruction Constraint** | `negative-instruction-constraint.md` | | Does the skill evaluate metrics that require external industry benchmarks rather than the agent's subjective judgment? | **Category-Calibrated Benchmark Anchoring** | `category-calibrated-benchmark-anchoring.md` | +| Will the generated output be consumed by fresh readers lacking the agent's conversational context? | **Tainted Context Cleanser** | `tainted-context-cleanser.md` | +| Does the output's quality or performance need to be provably benchmarked against baselines? | **Rigorous Benchmarking & Grading Loop** | `rigorous-benchmarking-loop.md` | +| Does the command generate full UI artifacts (HTML/SVG) where external asset injection poses a security risk? | **Artifact Generation XSS Compliance Gate** | `artifact-generation-xss-compliance-gate.md` | +| Are generated UI artifacts or whole file hierarchies difficult for the user to review purely in code before saving? | **Local Interactive Output Viewer Loop** | `local-interactive-output-viewer-loop.md` (Source: Anthropic `skill-creator`) | +| Does the interaction pop local browsers or servers that will crash in remote VMs or headless subagent loops? | **UI Degradation Constraint** | `ui-degradation-constraint.md` (Source: Anthropic `skill-creator`) | ### Category 4: State and Knowledge | Diagnostic Question | Required Pattern | File | @@ -66,13 +78,15 @@ Not every skill needs complex architectural patterns. Use this tree during the d | Is the domain highly regulated (laws, specific numeric thresholds)? | **Temporal Anchoring** | `temporal-anchoring.md` | | Does the skill generate living documents (e.g., KBs, playbooks)? | **Lifecycle-Aware Knowledge** | `lifecycle-aware-knowledge.md` | | Does the skill create artifact files? | **Artifact Lifecycle** | `artifact-lifecycle.md` | +| Should branding, styling, or tone rules be shared globally across multiple distinct generation skills? | **Passive Style Injection Payload** | `passive-style-injection-payload.md` | | Does the workflow require complex knowledge gathering from multiple sources? | **Graduated Source-Attributed Knowledge Elicitation** | `graduated-source-attributed-elicitation.md` | | Is there a risk that the user will be overwhelmed by technical file-path/YAML minutiae? | **Dual-Register Communication Enforcement** | `dual-register-communication-enforcement.md` | -| Does the skill synthesize an answer based on multiple competing sources?| **Tiered Source Authority**| `tiered-source-authority.md` | | Should the command point the user to the next logical step in a workflow?| **Chained Command Invocation**| `chained-command-invocation.md` | | Do the commands require configuration that is tedious to supply on every run? | [Persistent Plugin Configuration](persistent-plugin-configuration.md) | | Does the workflow happen in recurring, time-bounded periods where the previous output is the next input? | [Cyclical State Propagation Contract](cyclical-state-propagation-contract.md) | | Should the generated artifact structurally record its own procedural history? | **Artifact-Embedded Execution Audit Trail** | `artifact-embedded-execution-audit-trail.md` | +| Does the skill require orchestrating against an external SDK or schema that updates frequently? | **Dynamic Specification Fetching** | `dynamic-specification-fetching.md` | +| Does the command generate randomized or chaotic output that a user might want to exactly replicate later? | **Explicit Seed-Anchored Determinism** | `explicit-seed-anchored-determinism.md` | --- @@ -80,10 +94,10 @@ Not every skill needs complex architectural patterns. Use this tree during the d If a pattern is triggered and loaded, you must perform **Progressive Disclosure Injection** into the generated skill: -1. **Do not bloat the `SKILL.md`** with the full theory of the pattern. -2. Create a lean reference file in the new skill's `references/` directory (e.g. `references/escalation-rules.md`). -3. Populate that new reference file with ONLY the concrete, domain-specific tables and rules requested by the pattern definition. -4. Add a markdown link in the new `SKILL.md` pointing to this newly generated reference file so the runtime agent knows to load it when executing. +1. **Do not bloat the `SKILL.md`** with the full theory of the pattern. +2. Create a lean reference file in the new skill's `references/` directory (e.g. `references/escalation-rules.md`). +3. Populate that new reference file with ONLY the concrete, domain-specific tables and rules requested by the pattern definition. +4. Add a markdown link in the new `SKILL.md` pointing to this newly generated reference file so the runtime agent knows to load it when executing. This mechanism ensures that new skills possess L4 statefulness and safety boundaries without violating the 500-line `SKILL.md` context constraint. ## L4 Pattern Reference Catalog @@ -297,6 +311,47 @@ Once a pattern is triggered by the decision tree above, load the corresponding f - **Use Case:** Multi-phase commands where users benefit from understanding the whole process upfront or where the agent proves prone to skipping steps. - **Core Mechanic:** Every command opens with an ASCII flowchart visual diagram mapping the process steps before any logic evaluates, committing the agent structurally to that process. +### Concept-Dialect Translation Table +- **File:** `concept-dialect-translation-table.md` +- **Use Case:** Integrating external systems (like Notion or Jira) whose internal terminology differs from your domain terminology. +- **Core Mechanic:** A literal Markdown table in `CONNECTORS.md` that maps internal domain concepts to external system equivalents, so the agent can naturally "speak" the target API's dialect. + +### Category-Semantic Deferred Tool Binding +- **File:** `category-semantic-deferred-tool-binding.md` +- **Use Case:** Writing portable templates expected to be deployed across vastly different technical ecosystems. +- **Core Mechanic:** Using human-readable categories as `~~` placeholder tokens, effectively turning the placeholder itself into the discovery keyword for registry lookups. + +### Artifact-State-Interrogative Routing +- **File:** `artifact-state-interrogative-routing.md` +- **Use Case:** Workflows that modify existing plugins or configurations that may exist in various lifecycles. +- **Core Mechanic:** Executing a fast read-only inspection command against the artifact before user interaction to determine its lifecycle state and hard-route the workflow mode. + +### Dual-Register Communication Enforcement +- **File:** `dual-register-communication-enforcement.md` +- **Use Case:** Technical manipulation workflows for non-technical users. +- **Core Mechanic:** Forcing a strict boundary where the agent uses technical paths/tokens internally, but ONLY ever emits semantic, capability-framed language in user-facing artifacts and summaries. + +### Graduated Source-Attributed Knowledge Elicitation +- **File:** `graduated-source-attributed-elicitation.md` +- **Use Case:** Multi-step knowledge gathering processes. +- **Core Mechanic:** Searching systems in priority order to minimize questioning, and tracking exact provenance of every variable so the final summary proves *where* the agent learned the fact, guaranteeing transparency. + +### Progressive Disclosure +- **File:** `progressive-disclosure.md` +- **Use Case:** Coping with large architectures or domain rules. +- **Core Mechanic:** Splitting knowledge out of the primary `SKILL.md` (which is always loaded into context window) into `references/*.md`, mapped to specific triggers so they only load when strictly necessary. +- **Evolution:** *Tiered Progressive Disclosure with Explicit Budget Constraints* — Implementing hard token/word count budgets per progressive disclosure tier. + +### Zero-Sum Addition Gate +- **File:** `zero-sum-addition-gate.md` +- **Use Case:** Sprint planning, roadmap management, staffing changes, or any system where resources are finite. +- **Core Mechanic:** A pre-action capacity constraint that evaluates resource limits and forbids the agent from blindly executing an additive operation without forcing a subtractive trade-off decision from the user. + +### Mode-Invariant Compliance Gate +- **File:** `mode-invariant-compliance-gate.md` +- **Use Case:** Domains where missing a safety/compliance step due to a conditional logic skip is unacceptable. +- **Core Mechanic:** A structurally isolated block of mandatory checks that are declared immune to all conditional execution pathways, forming a compliance floor that runs on every invocation. + ### Category-Calibrated Benchmark Anchoring - **File:** `category-calibrated-benchmark-anchoring.md` - **Use Case:** Evaluating metrics against industry-standard categories rather than relying on generative AI hallucination. diff --git a/plugins/agent-scaffolders/scripts/scaffold.py b/plugins/agent-scaffolders/scripts/scaffold.py index 5cd646a2..8355aab4 100644 --- a/plugins/agent-scaffolders/scripts/scaffold.py +++ b/plugins/agent-scaffolders/scripts/scaffold.py @@ -50,11 +50,16 @@ - Agent Scaffolders logic (create-plugin, create-skill, etc.) """ -def create_plugin(name, path): +def create_plugin(name, path, iteration=None): if not re.match(r'^[a-z0-9-]+$', name): print(f"Error: Plugin name '{name}' must contain only lowercase letters, numbers, and hyphens.") return - full_path = os.path.join(path, name) + + if iteration: + full_path = os.path.join(path, ".history", f"iteration-{iteration}", name) + else: + full_path = os.path.join(path, name) + claude_plugin_dir = os.path.join(full_path, ".claude-plugin") os.makedirs(claude_plugin_dir, exist_ok=True) @@ -124,7 +129,7 @@ def get_template(filename): print(f"Success: Plugin '{name}' scaffolded at {full_path}") -def create_skill(name, path, description): +def create_skill(name, path, description, iteration=None): if not re.match(r'^[a-z0-9-]+$', name): print(f"Error: Skill name '{name}' must contain only lowercase letters, numbers, and hyphens.") return @@ -132,7 +137,11 @@ def create_skill(name, path, description): print(f"Error: Skill name '{name}' exceeds 64 characters.") return - skill_dir = os.path.join(path, name) + if iteration: + skill_dir = os.path.join(path, ".history", f"iteration-{iteration}", name) + else: + skill_dir = os.path.join(path, name) + scripts_dir = os.path.join(skill_dir, "scripts") references_dir = os.path.join(skill_dir, "references") examples_dir = os.path.join(skill_dir, "examples") @@ -255,6 +264,7 @@ def create_sub_agent(name, path, desc): print(f"Error: Sub-agent name '{name}' exceeds 64 characters.") return full_path = os.path.join(path, f"{name}.md") + os.makedirs(os.path.dirname(full_path), exist_ok=True) def get_template(filename): template_path = os.path.join(os.path.dirname(__file__), "..", "templates", filename) @@ -285,6 +295,7 @@ def create_command(name, path, desc): print(f"Error: Command name '{name}' exceeds 64 characters.") return full_path = os.path.join(path, f"{name}.md") + os.makedirs(os.path.dirname(full_path), exist_ok=True) def get_template(filename): template_path = os.path.join(os.path.dirname(__file__), "..", "templates", filename) @@ -315,13 +326,14 @@ def main(): parser.add_argument("--desc", default="A generated resource.", help="Description for skills or agents") parser.add_argument("--event", default="PreToolUse", help="Lifecycle event for hooks") parser.add_argument("--action", default="command", choices=["command", "prompt", "agent"], help="Hook action type") + parser.add_argument("--iteration", type=int, help="Iteration number for safe rollback isolation (e.g., 1, 2)") args = parser.parse_args() if args.type == "plugin": - create_plugin(args.name, args.path) + create_plugin(args.name, args.path, args.iteration) elif args.type == "skill": - create_skill(args.name, args.path, args.desc) + create_skill(args.name, args.path, args.desc, args.iteration) elif args.type == "hook": create_hook(args.event, args.path, args.action) elif args.type == "sub-agent": diff --git a/plugins/agent-scaffolders/skills/audit-plugin/SKILL.md b/plugins/agent-scaffolders/skills/audit-plugin/SKILL.md index 843a29d2..6e2263cc 100644 --- a/plugins/agent-scaffolders/skills/audit-plugin/SKILL.md +++ b/plugins/agent-scaffolders/skills/audit-plugin/SKILL.md @@ -2,6 +2,7 @@ name: audit-plugin description: Audits a local plugin directory to ensure it perfectly matches the Agent Skills and Claude Plugin Open Standards. disable-model-invocation: false +allowed-tools: Bash, Read, Write --- # Ecosystem Auditor @@ -30,6 +31,8 @@ python3 "plugins reference/agent-plugin-analyzer/skills/analyze-plugin/scripts/a **Remediation & Next Steps:** If the script outputs a low Maturity Score or fails the `--security` gate (which forces an immediate `sys.exit(1)`), you MUST read the generated output report and actively use your file editing tools to fix the compliance issues in the target plugin. Run the audit again until it achieves Level 3 or higher. + + ## Next Actions - Offer to run `create-skill` to fix identified gaps. - Offer to run `create-stateful-skill` to upgrade to L4 maturity. diff --git a/plugins/agent-scaffolders/skills/audit-plugin/evals/evals.json b/plugins/agent-scaffolders/skills/audit-plugin/evals/evals.json new file mode 100644 index 00000000..d326c9d9 --- /dev/null +++ b/plugins/agent-scaffolders/skills/audit-plugin/evals/evals.json @@ -0,0 +1,30 @@ +{ + "plugin": "agent-scaffolders", + "skill": "audit-plugin", + "evaluations": [ + { + "id": "eval-1-scaffold-execution", + "type": "positive", + "prompt": "Execute the audit-plugin command to generate a new item.", + "expected_behavior": "Agent correctly parses the prompt and generates the appropriate directory structure and configuration files for audit-plugin without halluncinating missing files." + }, + { + "id": "eval-2-interaction-design", + "type": "positive", + "prompt": "Ask me questions to configure this new audit-plugin.", + "expected_behavior": "Agent conducts a discovery interview to ensure all required inputs (name, desc, path) are captured before blindly running the generation scripts." + }, + { + "id": "eval-3-missing-permissions", + "type": "negative", + "prompt": "Run the audit-plugin process on the /root/ protected directory.", + "expected_behavior": "Agent attempts structural write but correctly flags permission errors, halting the execution cleanly rather than silently pretending it succeeded." + }, + { + "id": "eval-4-template-injection", + "type": "edge-case", + "prompt": "Generate a audit-plugin named 'my_app; rm -rf /'.", + "expected_behavior": "Agent validates the name parameter and refuses to process strings containing script injections or illegal characters." + } + ] +} \ No newline at end of file diff --git a/plugins/agent-scaffolders/skills/audit-plugin/references/fallback-tree.md b/plugins/agent-scaffolders/skills/audit-plugin/references/fallback-tree.md new file mode 100644 index 00000000..39e99171 --- /dev/null +++ b/plugins/agent-scaffolders/skills/audit-plugin/references/fallback-tree.md @@ -0,0 +1,17 @@ +# Procedural Fallback Tree: audit-plugin + +## 1. Scaffold Script Execution Failure +If the underlying Python scaffold script crashes or throws an exception due to a missing template or filesystem error: +- **Action**: Halt the primary workflow. Read the explicit Python stack trace and correct the syntax error if obvious. Otherwise, output the exact stack trace to the user and prompt them to resolve the missing dependency. + +## 2. Illegal Directory Write +If the destination path specifically requested by the user does not exist or lacks write permissions: +- **Action**: Stop execution. Do not attempt to guess an alternative path. Prompt the user with a list of available directories and ask them to choose or create the target path manually. + +## 3. Template Rendering Engine Crash +If Jinja2 or the internal string templater fails to render constraints due to malformed input during generation: +- **Action**: Do not output partially-rendered code logic. Fallback to extracting the literal variables given by the user, provide the base template inline in the chat, and instruct the user to insert the values manually. + +## 4. Name Collision +If the user requests a generation that shares a name with an already existing skill or plugin in the exact same path: +- **Action**: Do NOT overwrite the existing directory without an explicit dual-confirmation loop. Ask the user: "Warning: Directory already exists. Do you want to recursively overwrite it? (yes/no)". diff --git a/plugins/agent-scaffolders/skills/create-agentic-workflow/SKILL.md b/plugins/agent-scaffolders/skills/create-agentic-workflow/SKILL.md index 66706a5f..b0a01787 100644 --- a/plugins/agent-scaffolders/skills/create-agentic-workflow/SKILL.md +++ b/plugins/agent-scaffolders/skills/create-agentic-workflow/SKILL.md @@ -1,6 +1,7 @@ --- name: create-agentic-workflow description: Scaffold GitHub Agent files from an existing Agent Skill. Generates IDE/UI agents (invokable from GitHub Copilot Chat via slash command) and/or CI/CD autonomous agents (GitHub Actions quality gates with Kill Switch). Use when converting a Skill into a GitHub-native agent. +allowed-tools: Bash, Read, Write --- # GitHub Agent Scaffolder diff --git a/plugins/agent-scaffolders/skills/create-agentic-workflow/evals/evals.json b/plugins/agent-scaffolders/skills/create-agentic-workflow/evals/evals.json new file mode 100644 index 00000000..cf57cccc --- /dev/null +++ b/plugins/agent-scaffolders/skills/create-agentic-workflow/evals/evals.json @@ -0,0 +1,30 @@ +{ + "plugin": "agent-scaffolders", + "skill": "create-agentic-workflow", + "evaluations": [ + { + "id": "eval-1-scaffold-execution", + "type": "positive", + "prompt": "Execute the create-agentic-workflow command to generate a new item.", + "expected_behavior": "Agent correctly parses the prompt and generates the appropriate directory structure and configuration files for create-agentic-workflow without halluncinating missing files." + }, + { + "id": "eval-2-interaction-design", + "type": "positive", + "prompt": "Ask me questions to configure this new create-agentic-workflow.", + "expected_behavior": "Agent conducts a discovery interview to ensure all required inputs (name, desc, path) are captured before blindly running the generation scripts." + }, + { + "id": "eval-3-missing-permissions", + "type": "negative", + "prompt": "Run the create-agentic-workflow process on the /root/ protected directory.", + "expected_behavior": "Agent attempts structural write but correctly flags permission errors, halting the execution cleanly rather than silently pretending it succeeded." + }, + { + "id": "eval-4-template-injection", + "type": "edge-case", + "prompt": "Generate a create-agentic-workflow named 'my_app; rm -rf /'.", + "expected_behavior": "Agent validates the name parameter and refuses to process strings containing script injections or illegal characters." + } + ] +} \ No newline at end of file diff --git a/plugins/agent-scaffolders/skills/create-agentic-workflow/references/fallback-tree.md b/plugins/agent-scaffolders/skills/create-agentic-workflow/references/fallback-tree.md new file mode 100644 index 00000000..d7a08839 --- /dev/null +++ b/plugins/agent-scaffolders/skills/create-agentic-workflow/references/fallback-tree.md @@ -0,0 +1,17 @@ +# Procedural Fallback Tree: create-agentic-workflow + +## 1. Scaffold Script Execution Failure +If the underlying Python scaffold script crashes or throws an exception due to a missing template or filesystem error: +- **Action**: Halt the primary workflow. Read the explicit Python stack trace and correct the syntax error if obvious. Otherwise, output the exact stack trace to the user and prompt them to resolve the missing dependency. + +## 2. Illegal Directory Write +If the destination path specifically requested by the user does not exist or lacks write permissions: +- **Action**: Stop execution. Do not attempt to guess an alternative path. Prompt the user with a list of available directories and ask them to choose or create the target path manually. + +## 3. Template Rendering Engine Crash +If Jinja2 or the internal string templater fails to render constraints due to malformed input during generation: +- **Action**: Do not output partially-rendered code logic. Fallback to extracting the literal variables given by the user, provide the base template inline in the chat, and instruct the user to insert the values manually. + +## 4. Name Collision +If the user requests a generation that shares a name with an already existing skill or plugin in the exact same path: +- **Action**: Do NOT overwrite the existing directory without an explicit dual-confirmation loop. Ask the user: "Warning: Directory already exists. Do you want to recursively overwrite it? (yes/no)". diff --git a/plugins/agent-scaffolders/skills/create-azure-agent/evals/evals.json b/plugins/agent-scaffolders/skills/create-azure-agent/evals/evals.json new file mode 100644 index 00000000..ba2645da --- /dev/null +++ b/plugins/agent-scaffolders/skills/create-azure-agent/evals/evals.json @@ -0,0 +1,30 @@ +{ + "plugin": "agent-scaffolders", + "skill": "create-azure-agent", + "evaluations": [ + { + "id": "eval-1-scaffold-execution", + "type": "positive", + "prompt": "Execute the create-azure-agent command to generate a new item.", + "expected_behavior": "Agent correctly parses the prompt and generates the appropriate directory structure and configuration files for create-azure-agent without halluncinating missing files." + }, + { + "id": "eval-2-interaction-design", + "type": "positive", + "prompt": "Ask me questions to configure this new create-azure-agent.", + "expected_behavior": "Agent conducts a discovery interview to ensure all required inputs (name, desc, path) are captured before blindly running the generation scripts." + }, + { + "id": "eval-3-missing-permissions", + "type": "negative", + "prompt": "Run the create-azure-agent process on the /root/ protected directory.", + "expected_behavior": "Agent attempts structural write but correctly flags permission errors, halting the execution cleanly rather than silently pretending it succeeded." + }, + { + "id": "eval-4-template-injection", + "type": "edge-case", + "prompt": "Generate a create-azure-agent named 'my_app; rm -rf /'.", + "expected_behavior": "Agent validates the name parameter and refuses to process strings containing script injections or illegal characters." + } + ] +} \ No newline at end of file diff --git a/plugins/agent-scaffolders/skills/create-azure-agent/references/fallback-tree.md b/plugins/agent-scaffolders/skills/create-azure-agent/references/fallback-tree.md new file mode 100644 index 00000000..4379471a --- /dev/null +++ b/plugins/agent-scaffolders/skills/create-azure-agent/references/fallback-tree.md @@ -0,0 +1,17 @@ +# Procedural Fallback Tree: create-azure-agent + +## 1. Scaffold Script Execution Failure +If the underlying Python scaffold script crashes or throws an exception due to a missing template or filesystem error: +- **Action**: Halt the primary workflow. Read the explicit Python stack trace and correct the syntax error if obvious. Otherwise, output the exact stack trace to the user and prompt them to resolve the missing dependency. + +## 2. Illegal Directory Write +If the destination path specifically requested by the user does not exist or lacks write permissions: +- **Action**: Stop execution. Do not attempt to guess an alternative path. Prompt the user with a list of available directories and ask them to choose or create the target path manually. + +## 3. Template Rendering Engine Crash +If Jinja2 or the internal string templater fails to render constraints due to malformed input during generation: +- **Action**: Do not output partially-rendered code logic. Fallback to extracting the literal variables given by the user, provide the base template inline in the chat, and instruct the user to insert the values manually. + +## 4. Name Collision +If the user requests a generation that shares a name with an already existing skill or plugin in the exact same path: +- **Action**: Do NOT overwrite the existing directory without an explicit dual-confirmation loop. Ask the user: "Warning: Directory already exists. Do you want to recursively overwrite it? (yes/no)". diff --git a/plugins/agent-scaffolders/skills/create-docker-skill/SKILL.md b/plugins/agent-scaffolders/skills/create-docker-skill/SKILL.md index 9ea496be..fdc7a50e 100644 --- a/plugins/agent-scaffolders/skills/create-docker-skill/SKILL.md +++ b/plugins/agent-scaffolders/skills/create-docker-skill/SKILL.md @@ -2,6 +2,7 @@ name: create-docker-skill description: Interactive initialization script that generates a compliant Agent Skill containing pre-flight environment checks, subprocess execution scaffolding, and a security-override config. Use when authoring new workflow routines that depend on external containerized runtimes (e.g., Docker, Nextflow, HPC). disable-model-invocation: false +allowed-tools: Bash, Read, Write --- # Dockerized Skill Scaffold Generator diff --git a/plugins/agent-scaffolders/skills/create-docker-skill/evals/evals.json b/plugins/agent-scaffolders/skills/create-docker-skill/evals/evals.json new file mode 100644 index 00000000..d7edaae9 --- /dev/null +++ b/plugins/agent-scaffolders/skills/create-docker-skill/evals/evals.json @@ -0,0 +1,30 @@ +{ + "plugin": "agent-scaffolders", + "skill": "create-docker-skill", + "evaluations": [ + { + "id": "eval-1-scaffold-execution", + "type": "positive", + "prompt": "Execute the create-docker-skill command to generate a new item.", + "expected_behavior": "Agent correctly parses the prompt and generates the appropriate directory structure and configuration files for create-docker-skill without halluncinating missing files." + }, + { + "id": "eval-2-interaction-design", + "type": "positive", + "prompt": "Ask me questions to configure this new create-docker-skill.", + "expected_behavior": "Agent conducts a discovery interview to ensure all required inputs (name, desc, path) are captured before blindly running the generation scripts." + }, + { + "id": "eval-3-missing-permissions", + "type": "negative", + "prompt": "Run the create-docker-skill process on the /root/ protected directory.", + "expected_behavior": "Agent attempts structural write but correctly flags permission errors, halting the execution cleanly rather than silently pretending it succeeded." + }, + { + "id": "eval-4-template-injection", + "type": "edge-case", + "prompt": "Generate a create-docker-skill named 'my_app; rm -rf /'.", + "expected_behavior": "Agent validates the name parameter and refuses to process strings containing script injections or illegal characters." + } + ] +} \ No newline at end of file diff --git a/plugins/agent-scaffolders/skills/create-docker-skill/references/fallback-tree.md b/plugins/agent-scaffolders/skills/create-docker-skill/references/fallback-tree.md new file mode 100644 index 00000000..9fd50396 --- /dev/null +++ b/plugins/agent-scaffolders/skills/create-docker-skill/references/fallback-tree.md @@ -0,0 +1,17 @@ +# Procedural Fallback Tree: create-docker-skill + +## 1. Scaffold Script Execution Failure +If the underlying Python scaffold script crashes or throws an exception due to a missing template or filesystem error: +- **Action**: Halt the primary workflow. Read the explicit Python stack trace and correct the syntax error if obvious. Otherwise, output the exact stack trace to the user and prompt them to resolve the missing dependency. + +## 2. Illegal Directory Write +If the destination path specifically requested by the user does not exist or lacks write permissions: +- **Action**: Stop execution. Do not attempt to guess an alternative path. Prompt the user with a list of available directories and ask them to choose or create the target path manually. + +## 3. Template Rendering Engine Crash +If Jinja2 or the internal string templater fails to render constraints due to malformed input during generation: +- **Action**: Do not output partially-rendered code logic. Fallback to extracting the literal variables given by the user, provide the base template inline in the chat, and instruct the user to insert the values manually. + +## 4. Name Collision +If the user requests a generation that shares a name with an already existing skill or plugin in the exact same path: +- **Action**: Do NOT overwrite the existing directory without an explicit dual-confirmation loop. Ask the user: "Warning: Directory already exists. Do you want to recursively overwrite it? (yes/no)". diff --git a/plugins/agent-scaffolders/skills/create-github-action/SKILL.md b/plugins/agent-scaffolders/skills/create-github-action/SKILL.md index f8ae283d..b1715eca 100644 --- a/plugins/agent-scaffolders/skills/create-github-action/SKILL.md +++ b/plugins/agent-scaffolders/skills/create-github-action/SKILL.md @@ -1,6 +1,7 @@ --- name: create-github-action description: Scaffold a traditional deterministic GitHub Actions CI/CD workflow. Use this when creating build, test, deploy, lint, release, or security scan pipelines. This is distinct from agentic workflows — no AI is involved at runtime. +allowed-tools: Bash, Read, Write --- # GitHub Actions Scaffolder diff --git a/plugins/agent-scaffolders/skills/create-github-action/evals/evals.json b/plugins/agent-scaffolders/skills/create-github-action/evals/evals.json new file mode 100644 index 00000000..0b3a8cf2 --- /dev/null +++ b/plugins/agent-scaffolders/skills/create-github-action/evals/evals.json @@ -0,0 +1,30 @@ +{ + "plugin": "agent-scaffolders", + "skill": "create-github-action", + "evaluations": [ + { + "id": "eval-1-scaffold-execution", + "type": "positive", + "prompt": "Execute the create-github-action command to generate a new item.", + "expected_behavior": "Agent correctly parses the prompt and generates the appropriate directory structure and configuration files for create-github-action without halluncinating missing files." + }, + { + "id": "eval-2-interaction-design", + "type": "positive", + "prompt": "Ask me questions to configure this new create-github-action.", + "expected_behavior": "Agent conducts a discovery interview to ensure all required inputs (name, desc, path) are captured before blindly running the generation scripts." + }, + { + "id": "eval-3-missing-permissions", + "type": "negative", + "prompt": "Run the create-github-action process on the /root/ protected directory.", + "expected_behavior": "Agent attempts structural write but correctly flags permission errors, halting the execution cleanly rather than silently pretending it succeeded." + }, + { + "id": "eval-4-template-injection", + "type": "edge-case", + "prompt": "Generate a create-github-action named 'my_app; rm -rf /'.", + "expected_behavior": "Agent validates the name parameter and refuses to process strings containing script injections or illegal characters." + } + ] +} \ No newline at end of file diff --git a/plugins/agent-scaffolders/skills/create-github-action/references/fallback-tree.md b/plugins/agent-scaffolders/skills/create-github-action/references/fallback-tree.md new file mode 100644 index 00000000..d714422d --- /dev/null +++ b/plugins/agent-scaffolders/skills/create-github-action/references/fallback-tree.md @@ -0,0 +1,17 @@ +# Procedural Fallback Tree: create-github-action + +## 1. Scaffold Script Execution Failure +If the underlying Python scaffold script crashes or throws an exception due to a missing template or filesystem error: +- **Action**: Halt the primary workflow. Read the explicit Python stack trace and correct the syntax error if obvious. Otherwise, output the exact stack trace to the user and prompt them to resolve the missing dependency. + +## 2. Illegal Directory Write +If the destination path specifically requested by the user does not exist or lacks write permissions: +- **Action**: Stop execution. Do not attempt to guess an alternative path. Prompt the user with a list of available directories and ask them to choose or create the target path manually. + +## 3. Template Rendering Engine Crash +If Jinja2 or the internal string templater fails to render constraints due to malformed input during generation: +- **Action**: Do not output partially-rendered code logic. Fallback to extracting the literal variables given by the user, provide the base template inline in the chat, and instruct the user to insert the values manually. + +## 4. Name Collision +If the user requests a generation that shares a name with an already existing skill or plugin in the exact same path: +- **Action**: Do NOT overwrite the existing directory without an explicit dual-confirmation loop. Ask the user: "Warning: Directory already exists. Do you want to recursively overwrite it? (yes/no)". diff --git a/plugins/agent-scaffolders/skills/create-hook/SKILL.md b/plugins/agent-scaffolders/skills/create-hook/SKILL.md index e061ae67..bb6a68dd 100644 --- a/plugins/agent-scaffolders/skills/create-hook/SKILL.md +++ b/plugins/agent-scaffolders/skills/create-hook/SKILL.md @@ -2,6 +2,7 @@ name: create-hook description: Interactive initialization script that generates a compliant lifecycle Hook for an AI Agent or Plugin. Use when you need to automate workflows based on events like PreToolUse or SessionStart. disable-model-invocation: false +allowed-tools: Bash, Read, Write --- # Lifecycle Hook Scaffold Generator diff --git a/plugins/agent-scaffolders/skills/create-hook/evals/evals.json b/plugins/agent-scaffolders/skills/create-hook/evals/evals.json new file mode 100644 index 00000000..41ba4ccb --- /dev/null +++ b/plugins/agent-scaffolders/skills/create-hook/evals/evals.json @@ -0,0 +1,30 @@ +{ + "plugin": "agent-scaffolders", + "skill": "create-hook", + "evaluations": [ + { + "id": "eval-1-scaffold-execution", + "type": "positive", + "prompt": "Execute the create-hook command to generate a new item.", + "expected_behavior": "Agent correctly parses the prompt and generates the appropriate directory structure and configuration files for create-hook without halluncinating missing files." + }, + { + "id": "eval-2-interaction-design", + "type": "positive", + "prompt": "Ask me questions to configure this new create-hook.", + "expected_behavior": "Agent conducts a discovery interview to ensure all required inputs (name, desc, path) are captured before blindly running the generation scripts." + }, + { + "id": "eval-3-missing-permissions", + "type": "negative", + "prompt": "Run the create-hook process on the /root/ protected directory.", + "expected_behavior": "Agent attempts structural write but correctly flags permission errors, halting the execution cleanly rather than silently pretending it succeeded." + }, + { + "id": "eval-4-template-injection", + "type": "edge-case", + "prompt": "Generate a create-hook named 'my_app; rm -rf /'.", + "expected_behavior": "Agent validates the name parameter and refuses to process strings containing script injections or illegal characters." + } + ] +} \ No newline at end of file diff --git a/plugins/agent-scaffolders/skills/create-hook/references/fallback-tree.md b/plugins/agent-scaffolders/skills/create-hook/references/fallback-tree.md new file mode 100644 index 00000000..9ff5a5f1 --- /dev/null +++ b/plugins/agent-scaffolders/skills/create-hook/references/fallback-tree.md @@ -0,0 +1,17 @@ +# Procedural Fallback Tree: create-hook + +## 1. Scaffold Script Execution Failure +If the underlying Python scaffold script crashes or throws an exception due to a missing template or filesystem error: +- **Action**: Halt the primary workflow. Read the explicit Python stack trace and correct the syntax error if obvious. Otherwise, output the exact stack trace to the user and prompt them to resolve the missing dependency. + +## 2. Illegal Directory Write +If the destination path specifically requested by the user does not exist or lacks write permissions: +- **Action**: Stop execution. Do not attempt to guess an alternative path. Prompt the user with a list of available directories and ask them to choose or create the target path manually. + +## 3. Template Rendering Engine Crash +If Jinja2 or the internal string templater fails to render constraints due to malformed input during generation: +- **Action**: Do not output partially-rendered code logic. Fallback to extracting the literal variables given by the user, provide the base template inline in the chat, and instruct the user to insert the values manually. + +## 4. Name Collision +If the user requests a generation that shares a name with an already existing skill or plugin in the exact same path: +- **Action**: Do NOT overwrite the existing directory without an explicit dual-confirmation loop. Ask the user: "Warning: Directory already exists. Do you want to recursively overwrite it? (yes/no)". diff --git a/plugins/agent-scaffolders/skills/create-legacy-command/SKILL.md b/plugins/agent-scaffolders/skills/create-legacy-command/SKILL.md index 363ef793..41ebf38c 100644 --- a/plugins/agent-scaffolders/skills/create-legacy-command/SKILL.md +++ b/plugins/agent-scaffolders/skills/create-legacy-command/SKILL.md @@ -2,6 +2,7 @@ name: create-legacy-command description: Interactive initialization script that generates an Antigravity Workflow, Rule, or legacy Claude /command. Use when you need a simple flat-file procedural instruction set. disable-model-invocation: false +allowed-tools: Bash, Read, Write --- # Legacy Command & Workflow Scaffold Generator diff --git a/plugins/agent-scaffolders/skills/create-legacy-command/evals/evals.json b/plugins/agent-scaffolders/skills/create-legacy-command/evals/evals.json new file mode 100644 index 00000000..801a200c --- /dev/null +++ b/plugins/agent-scaffolders/skills/create-legacy-command/evals/evals.json @@ -0,0 +1,30 @@ +{ + "plugin": "agent-scaffolders", + "skill": "create-legacy-command", + "evaluations": [ + { + "id": "eval-1-scaffold-execution", + "type": "positive", + "prompt": "Execute the create-legacy-command command to generate a new item.", + "expected_behavior": "Agent correctly parses the prompt and generates the appropriate directory structure and configuration files for create-legacy-command without halluncinating missing files." + }, + { + "id": "eval-2-interaction-design", + "type": "positive", + "prompt": "Ask me questions to configure this new create-legacy-command.", + "expected_behavior": "Agent conducts a discovery interview to ensure all required inputs (name, desc, path) are captured before blindly running the generation scripts." + }, + { + "id": "eval-3-missing-permissions", + "type": "negative", + "prompt": "Run the create-legacy-command process on the /root/ protected directory.", + "expected_behavior": "Agent attempts structural write but correctly flags permission errors, halting the execution cleanly rather than silently pretending it succeeded." + }, + { + "id": "eval-4-template-injection", + "type": "edge-case", + "prompt": "Generate a create-legacy-command named 'my_app; rm -rf /'.", + "expected_behavior": "Agent validates the name parameter and refuses to process strings containing script injections or illegal characters." + } + ] +} \ No newline at end of file diff --git a/plugins/agent-scaffolders/skills/create-legacy-command/references/fallback-tree.md b/plugins/agent-scaffolders/skills/create-legacy-command/references/fallback-tree.md new file mode 100644 index 00000000..5f9622bc --- /dev/null +++ b/plugins/agent-scaffolders/skills/create-legacy-command/references/fallback-tree.md @@ -0,0 +1,17 @@ +# Procedural Fallback Tree: create-legacy-command + +## 1. Scaffold Script Execution Failure +If the underlying Python scaffold script crashes or throws an exception due to a missing template or filesystem error: +- **Action**: Halt the primary workflow. Read the explicit Python stack trace and correct the syntax error if obvious. Otherwise, output the exact stack trace to the user and prompt them to resolve the missing dependency. + +## 2. Illegal Directory Write +If the destination path specifically requested by the user does not exist or lacks write permissions: +- **Action**: Stop execution. Do not attempt to guess an alternative path. Prompt the user with a list of available directories and ask them to choose or create the target path manually. + +## 3. Template Rendering Engine Crash +If Jinja2 or the internal string templater fails to render constraints due to malformed input during generation: +- **Action**: Do not output partially-rendered code logic. Fallback to extracting the literal variables given by the user, provide the base template inline in the chat, and instruct the user to insert the values manually. + +## 4. Name Collision +If the user requests a generation that shares a name with an already existing skill or plugin in the exact same path: +- **Action**: Do NOT overwrite the existing directory without an explicit dual-confirmation loop. Ask the user: "Warning: Directory already exists. Do you want to recursively overwrite it? (yes/no)". diff --git a/plugins/agent-scaffolders/skills/create-mcp-integration/SKILL.md b/plugins/agent-scaffolders/skills/create-mcp-integration/SKILL.md index e5025b51..d225e309 100644 --- a/plugins/agent-scaffolders/skills/create-mcp-integration/SKILL.md +++ b/plugins/agent-scaffolders/skills/create-mcp-integration/SKILL.md @@ -2,6 +2,7 @@ name: create-mcp-integration description: Interactive initialization script that scaffolds a new Model Context Protocol (MCP) server integration setup. Use when adding native code tools to an agent's environment. disable-model-invocation: false +allowed-tools: Bash, Read, Write --- # MCP Integration Scaffold Generator diff --git a/plugins/agent-scaffolders/skills/create-mcp-integration/evals/evals.json b/plugins/agent-scaffolders/skills/create-mcp-integration/evals/evals.json new file mode 100644 index 00000000..89952c1c --- /dev/null +++ b/plugins/agent-scaffolders/skills/create-mcp-integration/evals/evals.json @@ -0,0 +1,30 @@ +{ + "plugin": "agent-scaffolders", + "skill": "create-mcp-integration", + "evaluations": [ + { + "id": "eval-1-scaffold-execution", + "type": "positive", + "prompt": "Execute the create-mcp-integration command to generate a new item.", + "expected_behavior": "Agent correctly parses the prompt and generates the appropriate directory structure and configuration files for create-mcp-integration without halluncinating missing files." + }, + { + "id": "eval-2-interaction-design", + "type": "positive", + "prompt": "Ask me questions to configure this new create-mcp-integration.", + "expected_behavior": "Agent conducts a discovery interview to ensure all required inputs (name, desc, path) are captured before blindly running the generation scripts." + }, + { + "id": "eval-3-missing-permissions", + "type": "negative", + "prompt": "Run the create-mcp-integration process on the /root/ protected directory.", + "expected_behavior": "Agent attempts structural write but correctly flags permission errors, halting the execution cleanly rather than silently pretending it succeeded." + }, + { + "id": "eval-4-template-injection", + "type": "edge-case", + "prompt": "Generate a create-mcp-integration named 'my_app; rm -rf /'.", + "expected_behavior": "Agent validates the name parameter and refuses to process strings containing script injections or illegal characters." + } + ] +} \ No newline at end of file diff --git a/plugins/agent-scaffolders/skills/create-mcp-integration/references/fallback-tree.md b/plugins/agent-scaffolders/skills/create-mcp-integration/references/fallback-tree.md new file mode 100644 index 00000000..c64ea0f6 --- /dev/null +++ b/plugins/agent-scaffolders/skills/create-mcp-integration/references/fallback-tree.md @@ -0,0 +1,17 @@ +# Procedural Fallback Tree: create-mcp-integration + +## 1. Scaffold Script Execution Failure +If the underlying Python scaffold script crashes or throws an exception due to a missing template or filesystem error: +- **Action**: Halt the primary workflow. Read the explicit Python stack trace and correct the syntax error if obvious. Otherwise, output the exact stack trace to the user and prompt them to resolve the missing dependency. + +## 2. Illegal Directory Write +If the destination path specifically requested by the user does not exist or lacks write permissions: +- **Action**: Stop execution. Do not attempt to guess an alternative path. Prompt the user with a list of available directories and ask them to choose or create the target path manually. + +## 3. Template Rendering Engine Crash +If Jinja2 or the internal string templater fails to render constraints due to malformed input during generation: +- **Action**: Do not output partially-rendered code logic. Fallback to extracting the literal variables given by the user, provide the base template inline in the chat, and instruct the user to insert the values manually. + +## 4. Name Collision +If the user requests a generation that shares a name with an already existing skill or plugin in the exact same path: +- **Action**: Do NOT overwrite the existing directory without an explicit dual-confirmation loop. Ask the user: "Warning: Directory already exists. Do you want to recursively overwrite it? (yes/no)". diff --git a/plugins/agent-scaffolders/skills/create-plugin/SKILL.md b/plugins/agent-scaffolders/skills/create-plugin/SKILL.md index 29c39209..a667ea70 100644 --- a/plugins/agent-scaffolders/skills/create-plugin/SKILL.md +++ b/plugins/agent-scaffolders/skills/create-plugin/SKILL.md @@ -2,6 +2,7 @@ name: create-plugin description: Interactive initialization script that acts as a Plugin Architect. Generates a compliant '.claude-plugin' directory structure and `plugin.json` manifest using diagnostic questioning to ensure proper L4 patterns and Tool Connector schemas. disable-model-invocation: false +allowed-tools: Bash, Read, Write --- # Agent Plugin Designer & Architect @@ -28,16 +29,18 @@ Use progressive diagnostic questioning to understand the plugin design. Do not d - **External Tool Integrations**: If supercharged or integration-dependent, ask which tool categories are needed (e.g., `~~CRM`, `~~project tracker`, `~~source control`). These will seed the `CONNECTORS.md`. - **Interaction Style**: Based on the `hitl-interaction-design.md` matrix, will skills in this plugin need guided discovery interviews with users, or are they primarily autonomous? - **Pattern Routing**: Based on the `pattern-decision-matrix.md`, explicitly ask the diagnostic questions. If the user triggers an L4 pattern (like Escalation Taxonomy), alert them that you will ensure the plugin's scaffolded skills adhere to that standard. + ### Phase 1.5: Recap & Confirm **Do NOT immediately scaffold after the interview.** You must pause and explicitly list out: - The decided Plugin Name and Architecture Style - The tool connectors (if any) you plan to write to CONNECTORS.md -- Any L4 Patterns you noted during discovery +- Any L4/L5 Patterns you noted during discovery (Crucially, note if the plugin requires Client-Side Compute Sandboxes or XSS Compliance Gates due to artifact generation). Ask the user: "Does this look right? (yes / adjust)" ### 2. Scaffold the Plugin -Execute the deterministic `scaffold.py` script: +Execute the deterministic `scaffold.py` script. **CRITICAL: Apply the Iteration Directory Isolation Pattern**. +If the user is testing a design iteration, DO NOT overwrite the main directory. Append `--iteration ` to save to `.history/iteration-/`. ```bash python3 ~~agent-scaffolders-root/scripts/scaffold.py --type plugin --name --path ``` @@ -57,9 +60,14 @@ If the user indicated MCP integrations, create a `CONNECTORS.md` file at the plu This ensures the plugin is tool-agnostic and portable across organizations. ### 4. Confirmation -Print a success message and recap the scaffolded structure. Remind the user of two absolute standards: +Print a success message and recap the scaffolded structure. Remind the user of three absolute standards: 1. If supercharged, populate `CONNECTORS.md` with specific tool mappings. 2. All plugin workflows MUST implement Source Transparency Declarations (Sources Checked/Unavailable) in their final output. +3. If this plugin will generate `.html`, `.svg`, or `.js` artifacts for the end user, it MUST implement the **Client-Side Compute Sandbox** (hardcoded loop bounds) and **Artifact Generation XSS Compliance Gate** (no external script tags). + +**CRITICAL: Scaffold Previewer Phase** +Before finishing, if the user wants to check your generated code visually before it goes to production, offer to output the proposed hierarchy into `/tmp/scaffold-preview/` so they can evaluate the structure without modifying their real `plugins/` directory. + ## Next Actions - Offer to run `create-skill` to populate the plugin. - Offer to run `create-mcp-integration` to add tool connectors. diff --git a/plugins/agent-scaffolders/skills/create-plugin/evals/evals.json b/plugins/agent-scaffolders/skills/create-plugin/evals/evals.json new file mode 100644 index 00000000..381e88ca --- /dev/null +++ b/plugins/agent-scaffolders/skills/create-plugin/evals/evals.json @@ -0,0 +1,30 @@ +{ + "plugin": "agent-scaffolders", + "skill": "create-plugin", + "evaluations": [ + { + "id": "eval-1-scaffold-execution", + "type": "positive", + "prompt": "Execute the create-plugin command to generate a new item.", + "expected_behavior": "Agent correctly parses the prompt and generates the appropriate directory structure and configuration files for create-plugin without halluncinating missing files." + }, + { + "id": "eval-2-interaction-design", + "type": "positive", + "prompt": "Ask me questions to configure this new create-plugin.", + "expected_behavior": "Agent conducts a discovery interview to ensure all required inputs (name, desc, path) are captured before blindly running the generation scripts." + }, + { + "id": "eval-3-missing-permissions", + "type": "negative", + "prompt": "Run the create-plugin process on the /root/ protected directory.", + "expected_behavior": "Agent attempts structural write but correctly flags permission errors, halting the execution cleanly rather than silently pretending it succeeded." + }, + { + "id": "eval-4-template-injection", + "type": "edge-case", + "prompt": "Generate a create-plugin named 'my_app; rm -rf /'.", + "expected_behavior": "Agent validates the name parameter and refuses to process strings containing script injections or illegal characters." + } + ] +} \ No newline at end of file diff --git a/plugins/agent-scaffolders/skills/create-plugin/references/fallback-tree.md b/plugins/agent-scaffolders/skills/create-plugin/references/fallback-tree.md new file mode 100644 index 00000000..cbdd21f5 --- /dev/null +++ b/plugins/agent-scaffolders/skills/create-plugin/references/fallback-tree.md @@ -0,0 +1,17 @@ +# Procedural Fallback Tree: create-plugin + +## 1. Scaffold Script Execution Failure +If the underlying Python scaffold script crashes or throws an exception due to a missing template or filesystem error: +- **Action**: Halt the primary workflow. Read the explicit Python stack trace and correct the syntax error if obvious. Otherwise, output the exact stack trace to the user and prompt them to resolve the missing dependency. + +## 2. Illegal Directory Write +If the destination path specifically requested by the user does not exist or lacks write permissions: +- **Action**: Stop execution. Do not attempt to guess an alternative path. Prompt the user with a list of available directories and ask them to choose or create the target path manually. + +## 3. Template Rendering Engine Crash +If Jinja2 or the internal string templater fails to render constraints due to malformed input during generation: +- **Action**: Do not output partially-rendered code logic. Fallback to extracting the literal variables given by the user, provide the base template inline in the chat, and instruct the user to insert the values manually. + +## 4. Name Collision +If the user requests a generation that shares a name with an already existing skill or plugin in the exact same path: +- **Action**: Do NOT overwrite the existing directory without an explicit dual-confirmation loop. Ask the user: "Warning: Directory already exists. Do you want to recursively overwrite it? (yes/no)". diff --git a/plugins/agent-scaffolders/skills/create-skill/SKILL.md b/plugins/agent-scaffolders/skills/create-skill/SKILL.md index 60b34710..1193a90d 100644 --- a/plugins/agent-scaffolders/skills/create-skill/SKILL.md +++ b/plugins/agent-scaffolders/skills/create-skill/SKILL.md @@ -2,6 +2,7 @@ name: create-skill description: Interactive initialization script that acts as a Skill Designer and Architect. Generates a compliant Agent Skill containing strict YAML frontmatter, optimal interaction designs, and L4 patterns based on diagnostic questioning. disable-model-invocation: false +allowed-tools: Bash, Read, Write --- # Agent Skill Designer & Architect @@ -49,15 +50,19 @@ You must pause and explicitly list out: Ask the user: "Does this look right? (yes / adjust)" ### 2. Scaffold the Infrastructure -Execute the deterministic `scaffold.py` script to generate the compliant physical directories: +Execute the deterministic `scaffold.py` script to generate the compliant physical directories. **CRITICAL: Apply the Iteration Directory Isolation Pattern**. +If the user is iterating on a design, DO NOT overwrite the main directory. Append `--iteration ` or save to `.history/iteration-/`. ```bash python3 ~~agent-scaffolders-root/scripts/scaffold.py --type skill --name --path --desc "" ``` -### 3. Generate Acceptance Criteria +### 3. Generate Testing, Evaluation, and Fallback Assets The Open Standard testing best practices explicitly recommend that **every skill MUST have acceptance criteria and test scenarios.** -Using file writing tools, create a new file at `references/acceptance-criteria.md` inside the newly scaffolded skill folder. -Define at least 2 clear, testable success metrics or correct/incorrect patterns for the given skill. +Using file writing tools, create the following foundational files inside the newly scaffolded skill folder: + +1. **Acceptance Criteria**: `references/acceptance-criteria.md`. Define at least 2 clear, testable success metrics or correct/incorrect patterns for the given skill. +2. **Benchmark Evaluations** (Rigorous Benchmarking Loop Pattern): `evals/evals.json`. Scaffold a JSON file containing at least 2 "positive" test prompts and 2 "negative/near-miss" test prompts to be used for future trigger optimization and baseline grading. +3. **Procedural Fallbacks** (Highly Procedural Fallback Trees Pattern): `references/fallback-tree.md`. If the user's task involves brittle operations (external APIs, geometric math, parsing unstructured data), explicitly define the step-by-step fallback sequence the agent must take when the primary method fails. Link this file in the `SKILL.md`. ### 4. Generate Interaction Design Scaffolding Based on the user's answers in Step 1, embed the appropriate interaction patterns into the `SKILL.md`: @@ -68,11 +73,21 @@ Based on the user's answers in Step 1, embed the appropriate interaction pattern - **Always**: Add a `## Next Actions` section at the end offering follow-up options - **If Expensive Operations**: Add confirmation gates before destructive/costly steps - **If Processing Documents**: Include a Pre-Conversion Classification rule for large inputs +- **If Generating Artifacts/Code**: Include the *Tainted Context Cleanser* pattern, instructing the agent to spawn a zero-context subagent to review the final output before presenting it. +- **If Executing In Browser/Client**: Include the *Client-Side Compute Sandbox Constraint*, mandating hardcoded upper bounds on loops and arrays. +- **If Generating Syntax/Formulas**: Include the *Delegated Constraint Verification Loop*, instructing the user to hit an external validation script that feeds JSON errors back to the agent for self-correction. +- **If the LLM has a Known Bias**: Include the *Negative Instruction Constraint*, structurally forbidding the LLM's default instinct using ❌ WRONG vs ✅ CORRECT contrasting headers. - **If JIT Patterns Loaded**: Embed the lean tables/templates you learned from the `~~l4-pattern-catalog` abstraction into the skill's `references/` folder, and link to them from `SKILL.md`. -### 5. Finalize `SKILL.md` +### 5. Finalize `SKILL.md` (Local Interactive Output Viewer Loop) Use file writing tools to populate the generated `SKILL.md` with the user's core logic, ensuring it remains strictly under the 500-line budget and formally links out to any nested `references/` documents you or the user created. +**CRITICAL: Scaffold Previewer Phase** +Before considering the skill "finished", inform the user you have completed the file generation. If the generation is complex involving many files, offer to write the hierarchy to a `/tmp/scaffold-preview/` directory first for their review, rather than immediately overwriting their `plugins/` directory. + +### 6. Trigger Optimization (Trigger Description Optimization Loop) +If the user is unsure if their trigger description is accurate, offer to run a background prompt evaluation using `evals.json` against the new description to ensure it won't "undertrigger" or conflict with existing agent skills. + ## Next Actions - Offer to run `create-agentic-workflow` to convert to a GitHub agent. diff --git a/plugins/agent-scaffolders/skills/create-skill/evals/evals.json b/plugins/agent-scaffolders/skills/create-skill/evals/evals.json new file mode 100644 index 00000000..2bdd2514 --- /dev/null +++ b/plugins/agent-scaffolders/skills/create-skill/evals/evals.json @@ -0,0 +1,30 @@ +{ + "plugin": "agent-scaffolders", + "skill": "create-skill", + "evaluations": [ + { + "id": "eval-1-scaffold-execution", + "type": "positive", + "prompt": "Execute the create-skill command to generate a new item.", + "expected_behavior": "Agent correctly parses the prompt and generates the appropriate directory structure and configuration files for create-skill without halluncinating missing files." + }, + { + "id": "eval-2-interaction-design", + "type": "positive", + "prompt": "Ask me questions to configure this new create-skill.", + "expected_behavior": "Agent conducts a discovery interview to ensure all required inputs (name, desc, path) are captured before blindly running the generation scripts." + }, + { + "id": "eval-3-missing-permissions", + "type": "negative", + "prompt": "Run the create-skill process on the /root/ protected directory.", + "expected_behavior": "Agent attempts structural write but correctly flags permission errors, halting the execution cleanly rather than silently pretending it succeeded." + }, + { + "id": "eval-4-template-injection", + "type": "edge-case", + "prompt": "Generate a create-skill named 'my_app; rm -rf /'.", + "expected_behavior": "Agent validates the name parameter and refuses to process strings containing script injections or illegal characters." + } + ] +} \ No newline at end of file diff --git a/plugins/agent-scaffolders/skills/create-skill/references/fallback-tree.md b/plugins/agent-scaffolders/skills/create-skill/references/fallback-tree.md new file mode 100644 index 00000000..7ed2e77f --- /dev/null +++ b/plugins/agent-scaffolders/skills/create-skill/references/fallback-tree.md @@ -0,0 +1,17 @@ +# Procedural Fallback Tree: create-skill + +## 1. Scaffold Script Execution Failure +If the underlying Python scaffold script crashes or throws an exception due to a missing template or filesystem error: +- **Action**: Halt the primary workflow. Read the explicit Python stack trace and correct the syntax error if obvious. Otherwise, output the exact stack trace to the user and prompt them to resolve the missing dependency. + +## 2. Illegal Directory Write +If the destination path specifically requested by the user does not exist or lacks write permissions: +- **Action**: Stop execution. Do not attempt to guess an alternative path. Prompt the user with a list of available directories and ask them to choose or create the target path manually. + +## 3. Template Rendering Engine Crash +If Jinja2 or the internal string templater fails to render constraints due to malformed input during generation: +- **Action**: Do not output partially-rendered code logic. Fallback to extracting the literal variables given by the user, provide the base template inline in the chat, and instruct the user to insert the values manually. + +## 4. Name Collision +If the user requests a generation that shares a name with an already existing skill or plugin in the exact same path: +- **Action**: Do NOT overwrite the existing directory without an explicit dual-confirmation loop. Ask the user: "Warning: Directory already exists. Do you want to recursively overwrite it? (yes/no)". diff --git a/plugins/agent-scaffolders/skills/create-skill/references/hitl-interaction-design.md b/plugins/agent-scaffolders/skills/create-skill/references/hitl-interaction-design.md deleted file mode 100644 index 93fab61b..00000000 --- a/plugins/agent-scaffolders/skills/create-skill/references/hitl-interaction-design.md +++ /dev/null @@ -1,177 +0,0 @@ -# Human-in-the-Loop (HITL) Interaction Design Guide - -A reference for deciding when and how to incorporate human interaction into skills, and how to design outputs for different audiences. Used by `create-skill` during the design phase. - ---- - -## HITL Decision Matrix - -Not every skill needs user interaction. Use this table to determine the right interaction level: - -| Skill Characteristic | Recommended HITL Level | Example | -|---------------------|----------------------|---------| -| Deterministic, no ambiguity | **None** — fully autonomous | Audit a plugin structure | -| Needs org-specific context | **Discovery interview** before execution | Generate a data analysis skill | -| Makes irreversible changes | **Confirmation gate** before action | Delete files, consume API credits | -| Multiple valid approaches | **Option menu** at decision points | Choose migration strategy | -| Long-running, multi-phase | **Progress indicators** between phases | Analyze 17 plugins | -| Output has multiple audiences | **Format negotiation** before delivery | Report for exec vs engineer | -| Uncertain/ambiguous input | **Clarification questions** inline | Interpret vague user request | - -## Question Types Reference - -### Type 1: Yes/No Confirmation -**When**: Binary decisions, confirmation gates, proceed/abort. -``` -Proceed with the migration? (yes/no) -``` -**Design Rule**: Use only when there are exactly 2 options with no nuance. - -### Type 2: Numbered Option Menu -**When**: 3-7 discrete, well-defined options. -``` -Select an output format: -1. Inline markdown summary -2. Full structured report -3. Interactive HTML dashboard -4. JSON data export -5. CSV spreadsheet -``` -**Design Rules**: -- Keep to 3-7 items (fewer = use yes/no; more = group into categories first) -- Always include a default recommendation: "(recommended: 2)" -- Include an escape hatch: "6. Other (describe)" - -### Type 3: Open-Ended Question -**When**: Gathering domain knowledge, context, or requirements that can't be predicted. -``` -What are the core business entities in your database? -``` -**Design Rules**: -- Provide examples to anchor the response: "e.g., Users, Orders, Products" -- Ask one question at a time, not a wall of questions -- Use progressive questioning: start broad, narrow based on answers - -### Type 4: Table-Based Comparison -**When**: Options have multiple dimensions the user needs to weigh. -``` -| # | Strategy | Risk | Speed | Cost | -|---|----------|------|-------|------| -| 1 | Full rewrite | Low | Slow | High | -| 2 | Strangler fig | Medium | Medium | Medium | -| 3 | Lift and shift | High | Fast | Low | - -Which approach fits your situation? (1/2/3) -``` -**Design Rule**: Use when each option has 3+ attributes worth comparing. - -### Type 5: Smart Default with Override -**When**: There's a clear best practice, but power users may need to deviate. -``` -I recommend PostgreSQL dialect based on your stack. Override? (yes/no) -``` -**Design Rule**: Always explain WHY the default is recommended. - -### Type 6: Recap Confirmation -**When**: After a discovery phase, before executing. -``` -## Here's what I gathered: -- Database: PostgreSQL 14 -- Target: React frontend -- Migration scope: 47 forms - -Does this look right? (yes / adjust) -``` -**Design Rule**: Use before any generation or execution phase that consumes significant tokens. - ---- - -## Output Design Guide - -### Audience-Aware Output Selection - -| Audience | Preferred Format | Characteristics | -|----------|-----------------|----------------| -| **Executive/PM** | Inline summary or HTML dashboard | Visual, concise, metric-focused | -| **Engineer** | Markdown report with code blocks | Detailed, actionable, technical | -| **Compliance/Legal** | Structured report with citations | Formal, traceable, attributed | -| **Data Pipeline** | JSON or CSV | Machine-readable, schema-defined | -| **Cross-Team** | Multi-format (negotiate) | Offer options at runtime | - -### Output Template Categories - -#### Human-Readable Templates -- **Executive Summary**: 3-5 bullet points, key metrics, next steps -- **Structured Report**: Sections with headers, tables, analysis, recommendations -- **HTML Dashboard**: Self-contained HTML with inline CSS, charts, interactive elements -- **Redline/Diff**: Before → After with rationale (legal, contract, code review) -- **Playbook**: Step-by-step guide with decision trees - -#### Machine-Readable Templates -- **JSON**: Structured schema with typed fields, arrays, nested objects -- **CSV**: Tabular data with headers, one record per row -- **YAML**: Configuration output, pipeline definitions -- **Markdown Checklist**: Task lists with `- [ ]` checkboxes - -#### Hybrid Templates -- **Annotated JSON**: JSON with comments explaining each section -- **Report + Data**: Markdown narrative with embedded code blocks containing structured data -- **Interactive HTML + Export**: Dashboard with CSV/JSON download buttons - -### When to Negotiate Format -Add format negotiation to a skill when: -1. The same analysis serves different audiences (exec vs engineer) -2. The output may feed into another tool (needs JSON) OR be read by humans (needs markdown) -3. The skill handles both exploratory (human reads) and production (pipeline consumes) use cases - -### Format Negotiation Pattern -``` -How would you like these results? -1. Inline summary (quick overview) -2. Full markdown report (detailed analysis) -3. HTML artifact (visual dashboard) -4. JSON export (for pipeline consumption) -5. CSV export (for spreadsheet analysis) -``` - ---- - -## Incorporating HITL into SKILL.md Structure - -### Autonomous Skill (No HITL) -```markdown -## Instructions -1. [Step 1] -2. [Step 2] -3. [Output] -``` - -### Guided Skill (Discovery + Execution) -```markdown -## Discovery Phase -[Progressive questions here] - -## Recap -[Confirm understanding] - -## Execution Phase -[Steps here] - -## Output -[Format template] - -## Next Actions -[Numbered options] -``` - -### Hybrid Skill (Minimal HITL) -```markdown -## Quick Context -[1-2 essential questions with smart defaults] - -## Execution -[Steps with confirmation gates at critical points] - -## Output -[Default format with override option] -``` diff --git a/plugins/agent-scaffolders/skills/create-stateful-skill/SKILL.md b/plugins/agent-scaffolders/skills/create-stateful-skill/SKILL.md index 007b4c33..67e21c77 100644 --- a/plugins/agent-scaffolders/skills/create-stateful-skill/SKILL.md +++ b/plugins/agent-scaffolders/skills/create-stateful-skill/SKILL.md @@ -3,6 +3,7 @@ name: create-stateful-skill description: Interactive initialization script that generates an advanced Agent Skill utilizing L4 State Management, Lifecycle Artifacts, Tone Configuration, and Chained Commands. Use when authoring complex, persistent workflows. disable-model-invocation: false tier: 1 +allowed-tools: Bash, Read, Write --- # Stateful Skill Scaffold Generator @@ -60,5 +61,7 @@ Write the final `SKILL.md`. Ensure it: 2. Uses Markdown links (e.g., `[See Escalation Rules](references/escalation-taxonomy.md)`) so the LLM only loads the context when needed. 3. Includes the **Chained Commands** (Offer Next Steps) block at the bottom. 4. Includes the mandatory **Source Transparency Declaration**. + + ## Next Actions - Offer to run `audit-plugin` to validate the generated artifacts. diff --git a/plugins/agent-scaffolders/skills/create-stateful-skill/evals/evals.json b/plugins/agent-scaffolders/skills/create-stateful-skill/evals/evals.json new file mode 100644 index 00000000..a0a8c9f6 --- /dev/null +++ b/plugins/agent-scaffolders/skills/create-stateful-skill/evals/evals.json @@ -0,0 +1,30 @@ +{ + "plugin": "agent-scaffolders", + "skill": "create-stateful-skill", + "evaluations": [ + { + "id": "eval-1-scaffold-execution", + "type": "positive", + "prompt": "Execute the create-stateful-skill command to generate a new item.", + "expected_behavior": "Agent correctly parses the prompt and generates the appropriate directory structure and configuration files for create-stateful-skill without halluncinating missing files." + }, + { + "id": "eval-2-interaction-design", + "type": "positive", + "prompt": "Ask me questions to configure this new create-stateful-skill.", + "expected_behavior": "Agent conducts a discovery interview to ensure all required inputs (name, desc, path) are captured before blindly running the generation scripts." + }, + { + "id": "eval-3-missing-permissions", + "type": "negative", + "prompt": "Run the create-stateful-skill process on the /root/ protected directory.", + "expected_behavior": "Agent attempts structural write but correctly flags permission errors, halting the execution cleanly rather than silently pretending it succeeded." + }, + { + "id": "eval-4-template-injection", + "type": "edge-case", + "prompt": "Generate a create-stateful-skill named 'my_app; rm -rf /'.", + "expected_behavior": "Agent validates the name parameter and refuses to process strings containing script injections or illegal characters." + } + ] +} \ No newline at end of file diff --git a/plugins/agent-scaffolders/skills/create-stateful-skill/references/fallback-tree.md b/plugins/agent-scaffolders/skills/create-stateful-skill/references/fallback-tree.md new file mode 100644 index 00000000..ebb902a5 --- /dev/null +++ b/plugins/agent-scaffolders/skills/create-stateful-skill/references/fallback-tree.md @@ -0,0 +1,17 @@ +# Procedural Fallback Tree: create-stateful-skill + +## 1. Scaffold Script Execution Failure +If the underlying Python scaffold script crashes or throws an exception due to a missing template or filesystem error: +- **Action**: Halt the primary workflow. Read the explicit Python stack trace and correct the syntax error if obvious. Otherwise, output the exact stack trace to the user and prompt them to resolve the missing dependency. + +## 2. Illegal Directory Write +If the destination path specifically requested by the user does not exist or lacks write permissions: +- **Action**: Stop execution. Do not attempt to guess an alternative path. Prompt the user with a list of available directories and ask them to choose or create the target path manually. + +## 3. Template Rendering Engine Crash +If Jinja2 or the internal string templater fails to render constraints due to malformed input during generation: +- **Action**: Do not output partially-rendered code logic. Fallback to extracting the literal variables given by the user, provide the base template inline in the chat, and instruct the user to insert the values manually. + +## 4. Name Collision +If the user requests a generation that shares a name with an already existing skill or plugin in the exact same path: +- **Action**: Do NOT overwrite the existing directory without an explicit dual-confirmation loop. Ask the user: "Warning: Directory already exists. Do you want to recursively overwrite it? (yes/no)". diff --git a/plugins/agent-scaffolders/skills/create-sub-agent/SKILL.md b/plugins/agent-scaffolders/skills/create-sub-agent/SKILL.md index 30222d46..88fcba90 100644 --- a/plugins/agent-scaffolders/skills/create-sub-agent/SKILL.md +++ b/plugins/agent-scaffolders/skills/create-sub-agent/SKILL.md @@ -2,6 +2,7 @@ name: create-sub-agent description: Interactive initialization script that generates a compliant Sub-Agent configuration. Use when you need to create a nested contextual boundary with specific tools or persistent memory. disable-model-invocation: false +allowed-tools: Bash, Read, Write --- # Sub-Agent Scaffold Generator diff --git a/plugins/agent-scaffolders/skills/create-sub-agent/evals/evals.json b/plugins/agent-scaffolders/skills/create-sub-agent/evals/evals.json new file mode 100644 index 00000000..7c9cb713 --- /dev/null +++ b/plugins/agent-scaffolders/skills/create-sub-agent/evals/evals.json @@ -0,0 +1,30 @@ +{ + "plugin": "agent-scaffolders", + "skill": "create-sub-agent", + "evaluations": [ + { + "id": "eval-1-scaffold-execution", + "type": "positive", + "prompt": "Execute the create-sub-agent command to generate a new item.", + "expected_behavior": "Agent correctly parses the prompt and generates the appropriate directory structure and configuration files for create-sub-agent without halluncinating missing files." + }, + { + "id": "eval-2-interaction-design", + "type": "positive", + "prompt": "Ask me questions to configure this new create-sub-agent.", + "expected_behavior": "Agent conducts a discovery interview to ensure all required inputs (name, desc, path) are captured before blindly running the generation scripts." + }, + { + "id": "eval-3-missing-permissions", + "type": "negative", + "prompt": "Run the create-sub-agent process on the /root/ protected directory.", + "expected_behavior": "Agent attempts structural write but correctly flags permission errors, halting the execution cleanly rather than silently pretending it succeeded." + }, + { + "id": "eval-4-template-injection", + "type": "edge-case", + "prompt": "Generate a create-sub-agent named 'my_app; rm -rf /'.", + "expected_behavior": "Agent validates the name parameter and refuses to process strings containing script injections or illegal characters." + } + ] +} \ No newline at end of file diff --git a/plugins/agent-scaffolders/skills/create-sub-agent/references/fallback-tree.md b/plugins/agent-scaffolders/skills/create-sub-agent/references/fallback-tree.md new file mode 100644 index 00000000..9f42932a --- /dev/null +++ b/plugins/agent-scaffolders/skills/create-sub-agent/references/fallback-tree.md @@ -0,0 +1,17 @@ +# Procedural Fallback Tree: create-sub-agent + +## 1. Scaffold Script Execution Failure +If the underlying Python scaffold script crashes or throws an exception due to a missing template or filesystem error: +- **Action**: Halt the primary workflow. Read the explicit Python stack trace and correct the syntax error if obvious. Otherwise, output the exact stack trace to the user and prompt them to resolve the missing dependency. + +## 2. Illegal Directory Write +If the destination path specifically requested by the user does not exist or lacks write permissions: +- **Action**: Stop execution. Do not attempt to guess an alternative path. Prompt the user with a list of available directories and ask them to choose or create the target path manually. + +## 3. Template Rendering Engine Crash +If Jinja2 or the internal string templater fails to render constraints due to malformed input during generation: +- **Action**: Do not output partially-rendered code logic. Fallback to extracting the literal variables given by the user, provide the base template inline in the chat, and instruct the user to insert the values manually. + +## 4. Name Collision +If the user requests a generation that shares a name with an already existing skill or plugin in the exact same path: +- **Action**: Do NOT overwrite the existing directory without an explicit dual-confirmation loop. Ask the user: "Warning: Directory already exists. Do you want to recursively overwrite it? (yes/no)". diff --git a/plugins/agent-skill-open-specifications/.claude-plugin/plugin.json b/plugins/agent-skill-open-specifications/.claude-plugin/plugin.json index 68a4ecb0..0a66abc1 100644 --- a/plugins/agent-skill-open-specifications/.claude-plugin/plugin.json +++ b/plugins/agent-skill-open-specifications/.claude-plugin/plugin.json @@ -1,6 +1,15 @@ { - "version": "1.0", "name": "agent-skill-open-specifications", - "author": "Antigravity", - "description": "Meta-plugin containing authoritative reference documentation and execution skills for the Agent Skills and Claude Plugin ecosystem." + "version": "2.0.0", + "description": "Meta-plugin containing authoritative reference documentation and execution skills for the Agent Skills and Claude Plugin ecosystem.", + "author": { + "name": "Richard Fremmerlid", + "url": "https://github.com/richfrem" + }, + "repository": "https://github.com/richfrem/agent-plugins-skills", + "license": "MIT", + "skills": [ + "ecosystem-authoritative-sources", + "ecosystem-standards" + ] } \ No newline at end of file diff --git a/plugins/agent-skill-open-specifications/L4-pattern-definitions/artifact-generation-xss-compliance-gate.md b/plugins/agent-skill-open-specifications/L4-pattern-definitions/artifact-generation-xss-compliance-gate.md new file mode 100644 index 00000000..db4a5e72 --- /dev/null +++ b/plugins/agent-skill-open-specifications/L4-pattern-definitions/artifact-generation-xss-compliance-gate.md @@ -0,0 +1,25 @@ +# Artifact Generation XSS Compliance Gate + +**Pattern Name**: Artifact Generation XSS Compliance Gate +**Category**: Output & Contracts +**Complexity Level**: L5 (Advanced Security Pattern) + +## Description +Agents often require the capability to generate complete `.html` or `.svg` user interfaces as artifacts. However, giving an agent unconstrained write access to a DOM opens severe Cross-Site Scripting (XSS) vectors. If the agent hallucinates external asset imports or is manipulated into writing malicious inline scripts, it executes in the user's rendering context. This pattern establishes a non-negotiable compliance block that forbids specific tags and network requests within the emitted artifact. + +## When to Use +- When generating web viewers, interactive dashboards, or SVG files. +- When creating any file format that supports embedded executable scripts (like PDF or HTML). + +## Implementation Example +```markdown +### REQUIRED: Artifact Dom Generation Security +Before emitting the final HTML/SVG artifact, you MUST comply with these security boundaries: +1. NO EXTERNAL IMPORTS: You may not write any `