diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml index 440cd5c..9629ecc 100644 --- a/.github/workflows/e2e.yml +++ b/.github/workflows/e2e.yml @@ -170,6 +170,24 @@ jobs: chmod +x ci/e2e-onboarding-flows.sh ci/e2e-onboarding-flows.sh + e2e-adapter-freshness: + name: E2E Adapter Schema + Freshness (8 cells) + runs-on: ubuntu-latest + timeout-minutes: 5 + # PR 6 of the 2026-05-10 architecture audit. Locks bin/check-adapters.sh + # end-to-end against tmp adapters/ fixtures: malformed JSON fails, + # README-listed adapter missing fails, stale beyond 60 days fails + # for README-listed hosts, NANOSTACK_ALLOW_STALE_ADAPTERS=1 + # downgrades to warn, --json output parses. + steps: + - uses: actions/checkout@v4 + - name: jq is present + run: jq --version + - name: Run adapter freshness E2E + run: | + chmod +x ci/e2e-adapter-freshness.sh + ci/e2e-adapter-freshness.sh + e2e-custom-routing: name: E2E Custom Routing Contract (8 cells) runs-on: ubuntu-latest diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 13b02c7..41dd617 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -3366,6 +3366,122 @@ jobs: fi echo "OK: guard concurrency tier uses the phase registry." + ai-facing-docs-consistency: + name: AI-facing docs agree on adapters, sprint, guard, privacy + runs-on: ubuntu-latest + # PR 6 of the 2026-05-10 architecture audit. llms.txt, AGENTS.md, + # bin/about.sh, guard/SKILL.md, and the public READMEs must not + # ship stale overclaims and must agree on the verified adapter + # set, the default sprint order, the layered guard structure, + # and the privacy posture. Lint catches drift; the architect + # round audited the surface state once. + permissions: + contents: read + steps: + - uses: actions/checkout@v4 + - name: No forbidden overclaims in agent-facing docs + run: | + set -e + fail=0 + patterns='any AI coding agent|any agent that reads SKILL|zero dependencies|full engineering team|no telemetry, no remote calls|three-tier guard|three-tier safety|three-tier permission system' + for f in llms.txt AGENTS.md bin/about.sh guard/SKILL.md README.md README.es.md; do + [ -f "$f" ] || continue + if grep -nEi "$patterns" "$f"; then + echo "FAIL: $f contains a forbidden overclaim above." + fail=1 + fi + done + exit $fail + - name: Verified adapters set is the same across surfaces + run: | + set -e + fail=0 + # Source of truth: filenames under adapters/. + adapters_truth=$(find adapters -maxdepth 1 -name "*.json" -type f \ + | sed 's|.*/||; s|\.json$||' | sort | tr '\n' ' ') + # Every public surface that names verified adapters must + # mention each one shipped under adapters/. Codex flagged + # the partial coverage on the PR 6 sixth review pass: the + # READMEs are the load-bearing public claim, and they were + # not in this loop. + for name in $adapters_truth; do + [ -z "$name" ] && continue + for f in AGENTS.md llms.txt README.md README.es.md; do + if ! grep -qi -- "$name" "$f"; then + echo "FAIL: $f does not mention adapter '$name'" + fail=1 + fi + done + done + # Reverse direction: AGENTS.md and llms.txt must NOT + # advertise a verified adapter that adapters/ no longer + # ships. Codex flagged the one-directional check on the + # PR 6 second review pass: a removed adapter could stay + # advertised in the agent-facing docs forever. + known_adapters="claude cursor codex opencode gemini" + for candidate in $known_adapters; do + case " $adapters_truth " in + *" $candidate "*) ;; + *) + for f in AGENTS.md llms.txt README.md README.es.md; do + # Match three shapes: + # - inline backticked -> `cursor` + # - "verified adapter" copy -> "verified adapter cursor" + # - bullet entry under a "Verified adapters" header -> "- Cursor" + # Codex flagged the bare-bullet miss on the PR 6 + # fifth review pass and the README omission on the + # sixth review pass. + if grep -qiE "(\`${candidate}\`|verified adapter[^.]*${candidate}|^[[:space:]]*[-*][[:space:]]+${candidate}\b)" "$f"; then + echo "FAIL: $f advertises adapter '$candidate' but adapters/${candidate}.json is missing." + fail=1 + fi + done + ;; + esac + done + exit $fail + - name: Default sprint order matches across surfaces + run: | + set -e + fail=0 + # The canonical order is documented as + # /think -> /nano -> build -> /review -> /security -> /qa -> /ship. + # Spot-check that bin/about.sh keeps the same arrows shape. + if ! grep -qE '/think.*/nano.*build.*/review.*/security.*/qa.*/ship' bin/about.sh; then + echo "FAIL: bin/about.sh sprint order does not match the canonical default." + fail=1 + fi + exit $fail + - name: Guard doc references rules.json instead of a hand-maintained count + run: | + set -e + # Hand-maintained "28 block rules and 9 warn rules" used to + # live in guard/SKILL.md. PR 6 of the audit removed that + # hardcoding. If a future commit adds back any specific + # block-rule count, the lint fails so the count never drifts + # from the JSON. (We allow generic "rule count" mentions + # that refer to guard/rules.json explicitly.) + if grep -nE '[0-9]+ (block|warn) rules' guard/SKILL.md README.md README.es.md AGENTS.md llms.txt bin/about.sh 2>/dev/null; then + echo "FAIL: a numeric block/warn rule count was found in a public doc." + echo " Replace with guard/rules.json as the source of truth." + exit 1 + fi + echo "OK: no hardcoded rule counts." + + adapter-freshness: + name: Adapter schema + freshness + runs-on: ubuntu-latest + # PR 6 of the 2026-05-10 architecture audit. Runs bin/check-adapters.sh + # so a stale or malformed adapter cannot ship to main. + permissions: + contents: read + steps: + - uses: actions/checkout@v4 + - name: Run check-adapters.sh + run: | + chmod +x bin/check-adapters.sh + bin/check-adapters.sh + custom-routing-contract: name: Custom routing contract wired into resolve.sh runs-on: ubuntu-latest diff --git a/AGENTS.md b/AGENTS.md index 447ca03..75193da 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -1,27 +1,28 @@ # Nanostack: Agent Discovery -This file lists all available skills for all supported agents (Claude Code, Cursor, Codex, OpenCode, Gemini CLI). -Each skill folder contains a `SKILL.md` for agent discovery and an `agents/openai.yaml` for OpenAI-compatible agents. +This file lists the skills shipped by Nanostack for the verified adapters: Claude Code, Cursor, OpenAI Codex, OpenCode, and Gemini CLI. Each skill folder contains a `SKILL.md` for adapter discovery and an `agents/openai.yaml` for OpenAI-compatible agents. Adapter capability evidence lives in `adapters/.json`; treat the JSON as the single source of truth for what a given host actually enforces (hook execution, write guard, phase gate). -## Available Skills +## Available skills | Skill | Directory | Description | |-------|-----------|-------------| -| think | `think/` | Strategic product thinking. Three modes (Founder/Startup/Builder) with calibrated intensity. YC-grade forcing questions, CEO cognitive patterns, manual delivery test. | -| nano | `plan/` | Implementation planning. Scope assessment, step-by-step plans with verification, product standards. | -| review | `review/` | Two-pass code review. Structural then adversarial. Scope drift detection against plan. Conflict detection with /security. | -| qa | `qa/` | Quality assurance. Browser, API, CLI and debug testing with Playwright. WTF heuristic. | -| security | `security/` | Security audit. OWASP Top 10, STRIDE, dependency scanning. Cross-references /review for conflicts. Graded report (A-F). | -| ship | `ship/` | Shipping pipeline. PR creation, CI monitoring, post-merge verification. Generates sprint journal on success. | -| guard | `guard/` | Three-tier safety. Allowlist, in-project bypass, 28 block rules with safer alternatives. Configurable in guard/rules.json. | +| think | `think/` | Strategic product thinking with calibrated intensity per archetype. Saves a structured artifact (value proposition, scope mode, target user, narrowest wedge, key risk, premise validation). | +| nano | `plan/` | Implementation planning. Planned files, plan approval, scope assessment, product standards. | +| review | `review/` | Two-pass code review (structural + adversarial). Scope drift detection against /nano. Conflict precedence with /security. | +| qa | `qa/` | Browser, API, CLI, or debug testing. WTF heuristic. | +| security | `security/` | OWASP Top 10 + STRIDE audit. Cross-references /review for conflicts. | +| ship | `ship/` | Pre-flight, PR creation, CI monitoring, post-deploy verification. Generates the sprint journal on success. | +| guard | `guard/` | Block and warn rules on Bash + Write/Edit. Phase concurrency, sprint phase gate, and budget gate run inside the same pipeline. Rule counts live in `guard/rules.json`. | | conductor | `conductor/` | Multi-agent sprint orchestrator. Parallel sessions via claim/complete protocol with atomic file locking. | -## Know-how Pipeline +## Custom workflow stacks -Skills automatically save artifacts to `.nanostack/` and cross-reference each other. `/ship` generates a sprint journal. The vault at `.nanostack/know-how/` works as an Obsidian vault. Run `bin/discard-sprint.sh` to clean up bad sessions. +Custom stacks declare their own phases in `.nanostack/config.json` (`custom_phases` + `phase_graph`) and live under `/skills//`. They get the same lifecycle support as the built-in sprint (graph-aware progression, concurrency enforcement, artifact trust, schema validation, routing intent through `phase_context`). The contract is in `reference/custom-stack-contract.md`; `examples/custom-stack-template/compliance-release/` is a worked example. -## Usage +## Know-how pipeline + +Skills automatically save artifacts to `.nanostack/`. Downstream skills read upstream artifacts through `bin/resolve.sh`, which honors the artifact-trust contract (PR 2) and the routing contract for custom skills (PR 5). `/ship` generates a sprint journal. `bin/discard-sprint.sh` cleans up bad sessions. -Each skill's `SKILL.md` contains the full instructions. Read it and follow the process described. +## Usage -Supporting files (templates, references, checklists, scripts) are in subdirectories. Read them when referenced by the SKILL.md. +Each skill's `SKILL.md` contains the full instructions. Read it and follow the process described. Supporting files (templates, references, checklists, scripts) live in subdirectories and are referenced from the SKILL.md when needed. diff --git a/README.es.md b/README.es.md index 5cb2f3f..5f2b5b8 100644 --- a/README.es.md +++ b/README.es.md @@ -201,14 +201,14 @@ Los agentes cometen errores. Corren `rm -rf` cuando querían `rm -r`, hacen forc Cada comando de Bash pasa por estos seis tiers, en este orden: -1. **Block rules**: las reglas de bloqueo corren primero. 35 reglas cubren borrado masivo (`rm -rf .`, `find . -delete`), destrucción de historia (`git push --force`), lecturas de secretos (`.env`, `*.pem`), drops de DB, deploys a producción y ejecución remota (`curl | sh`). Una coincidencia bloquea aunque el binario esté en el allowlist de abajo. +1. **Block rules**: las reglas de bloqueo corren primero. Cubren borrado masivo (`rm -rf .`, `find . -delete`), destrucción de historia (`git push --force`), lecturas de secretos (`.env`, `*.pem`), drops de DB, deploys a producción y ejecución remota (`curl | sh`). Una coincidencia bloquea aunque el binario esté en el allowlist de abajo. La fuente de verdad es [`guard/rules.json`](guard/rules.json); para ver el conteo actual: `jq '[.tiers.block.rules[].id] | length' guard/rules.json`. 2. **Allowlist**: para comandos que pasaron las block rules, los allowlisteados (`git status`, `ls`, `cat`, `jq`, etc.) saltan el resto. 3. **In-project**: operaciones que solo tocan archivos del repo actual pasan. El control de versiones es la red de seguridad. 4. **Concurrencia por fase**: durante fases read-only (review, qa, security), las operaciones de escritura quedan bloqueadas para evitar race conditions. 5. **Phase gate**: cuando hay un sprint activo, `git commit` y `git push` quedan bloqueados hasta que existan artifacts frescos de review, security y qa. 6. **Budget gate**: cuando el sprint tiene un presupuesto y se gastó 95%+, todos los comandos no-allowlist quedan bloqueados. -Plus 9 reglas de advertencia para operaciones que requieren atención sin llegar a bloqueo. +Mas un tier de reglas de advertencia (`warn`) para operaciones que requieren atención sin llegar a bloqueo. Las definiciones también viven en `guard/rules.json`. Las herramientas Write, Edit y MultiEdit pasan por su propio hook (`guard/bin/check-write.sh`) que niega rutas protegidas: archivos de secretos (`.env` y variantes, `*.pem`, `*.key`, llaves SSH) y directorios de sistema o usuario-secreto (`/etc`, `/var`, `/usr/bin`, `~/.ssh`, `~/.aws`, `~/.kube`). Los symlinks se resuelven antes de matchear, así que un `mylink/config -> ~/.ssh/config` se trata como destino resuelto. diff --git a/README.md b/README.md index 3b2571b..209c292 100644 --- a/README.md +++ b/README.md @@ -443,7 +443,7 @@ AI agents make mistakes. They run `rm -rf` when they mean `rm -r`, force push to Inspired by [Claude Code auto mode](https://www.anthropic.com/engineering/claude-code-auto-mode), guard evaluates every Bash command through six tiers in this order: -**Tier 1: Block rules.** Patterns for mass deletion, history destruction, database drops, production deploys, remote code execution, secret reads, security degradation and safety bypasses run first. A match exits 1 immediately, even if the command's binary is on the allowlist below. This ordering closes the bypass class where `find . -delete` or `cat .env` slipped past Tier 2 because `find` and `cat` were on the allowlist. 35 block rules total. +**Tier 1: Block rules.** Patterns for mass deletion, history destruction, database drops, production deploys, remote code execution, secret reads, security degradation and safety bypasses run first. A match exits 1 immediately, even if the command's binary is on the allowlist below. This ordering closes the bypass class where `find . -delete` or `cat .env` slipped past Tier 2 because `find` and `cat` were on the allowlist. Block rule definitions live in [`guard/rules.json`](guard/rules.json); query the live count with `jq '[.tiers.block.rules[].id] | length' guard/rules.json`. **Tier 2: Allowlist.** After block rules clear, commands like `git status`, `ls`, `cat`, `jq` skip the remaining checks. They are read-only or otherwise side-effect-free for safe arguments. @@ -455,7 +455,7 @@ Inspired by [Claude Code auto mode](https://www.anthropic.com/engineering/claude **Tier 6: Budget gate.** When a sprint budget is set and 95%+ spent, all non-allowlisted commands are blocked. The agent can still run safe commands (`ls`, `git status`, `cat`) to save work, but cannot execute builds, tests, or deploys. Bypass with `NANOSTACK_SKIP_BUDGET=1`. -Plus a Tier 7 of warn rules for operations that need attention but not blocking. 9 warn rules total. +Plus a Tier 7 of warn rules for operations that need attention but not blocking. Warn rule definitions also live in `guard/rules.json`. ### Write and Edit are hooked too diff --git a/bin/about.sh b/bin/about.sh index 3198d03..ddc03a2 100755 --- a/bin/about.sh +++ b/bin/about.sh @@ -1,7 +1,8 @@ #!/usr/bin/env bash # about.sh — Generate compact self-description for agents # Writes .nanostack/ABOUT.md with skills, flow, key commands. -# Any agent (Cursor, Codex, Claude Code) can read this to understand nanostack. +# Verified adapters: Claude Code, Cursor, OpenAI Codex, OpenCode, Gemini CLI. +# Adapter capabilities live in adapters/.json. # # Usage: about.sh Generate/update ABOUT.md # about.sh --print Print to stdout instead of file @@ -21,9 +22,21 @@ SESSIONS=$(find "$NANOSTACK_STORE/sessions" -name "*.json" -type f 2>/dev/null | HAS_CONFIG="no" [ -f "$NANOSTACK_STORE/config.json" ] && HAS_CONFIG="yes" +# Adapter list: read names from adapters/*.json so this stays in sync +# with the single source of truth. Falls back to the canonical five if +# the adapters directory is missing. `paste -sd ', '` alternates the +# delimiter byte-by-byte on macOS, so we use awk for a clean +# comma-space join. +ADAPTER_LIST="" +if [ -d "$NANOSTACK_ROOT/adapters" ]; then + ADAPTER_LIST=$(find "$NANOSTACK_ROOT/adapters" -maxdepth 1 -name "*.json" -type f 2>/dev/null \ + | sed 's|.*/||; s|\.json$||' | sort | awk 'NR>1{printf ", "} {printf "%s",$0} END{print ""}') +fi +[ -z "$ADAPTER_LIST" ] && ADAPTER_LIST="claude, codex, cursor, gemini, opencode" + DOC="# Nanostack -Sprint quality framework. Turns your AI agent into an engineering team. +Local workflow framework for AI coding agents. The built-in sprint plus a framework for declaring your own custom workflow stacks. Verified adapters: $ADAPTER_LIST. ## Flow @@ -56,14 +69,20 @@ Sprint quality framework. Turns your AI agent into an engineering team. | bin/doctor.sh | Know-how health check. | | bin/capture-failure.sh | Log what went wrong (no /compound needed). | +## Custom workflow stacks + +Declare your own phases in \`.nanostack/config.json\` (\`custom_phases\` + \`phase_graph\`) and put the skill under \`/skills//\`. Conductor scheduling, guard concurrency, the artifact contract, session lifecycle, next-step output, and the resolver all consume the same phase registry. See \`reference/custom-stack-contract.md\` and \`examples/custom-stack-template/compliance-release/\`. + ## State All data in \`.nanostack/\`: -- Artifacts: \`.nanostack//.json\` +- Artifacts: \`.nanostack//.json\` with SHA-256 integrity field. - Solutions: \`.nanostack/know-how/solutions/{bug,pattern,decision}/\` - Briefs: \`.nanostack/know-how/briefs/\` - Audit log: \`.nanostack/audit.log\` +There is no Nanostack cloud. Telemetry is opt-in and documented in \`reference/telemetry.md\`. + ## This Project - Solutions: $SOLUTIONS diff --git a/bin/check-adapters.sh b/bin/check-adapters.sh new file mode 100755 index 0000000..368d2eb --- /dev/null +++ b/bin/check-adapters.sh @@ -0,0 +1,390 @@ +#!/usr/bin/env bash +# check-adapters.sh — Validate adapters/.json files. +# +# PR 6 of the 2026-05-10 architecture audit. Adapters declare what a +# given host (claude, codex, cursor, opencode, gemini, ...) actually +# enforces for the Bash guard, write guard, and phase gate. This +# script validates schema + capability enum membership + last_verified +# freshness and surfaces drift before it reaches a release. +# +# Usage: +# bin/check-adapters.sh Validate every adapters/*.json. +# bin/check-adapters.sh Validate one adapter. +# bin/check-adapters.sh --json Machine-readable summary. +# +# Freshness policy: +# - warn after 30 days +# - fail after 60 days for README-listed adapters +# - manual override: NANOSTACK_ALLOW_STALE_ADAPTERS=1 downgrades the +# fail to a warning. Not for CI; intended for `bin/check-adapters.sh` +# when a maintainer is explicitly re-running on an old branch. +# +# Exit code: +# 0 all adapters validate within the freshness window +# 1 any adapter is malformed, missing a required key, has a value +# outside the enum, has unparseable last_verified, or is stale +# beyond the README-fail threshold +set -e +set -u + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +NANOSTACK_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" +ADAPTER_DIR="$NANOSTACK_ROOT/adapters" + +WARN_DAYS=30 +FAIL_DAYS=60 + +JSON_OUT=false +FILTER="" +for arg in "$@"; do + case "$arg" in + --json) JSON_OUT=true ;; + -h|--help) + sed -n '/^# /,/^$/p' "$0" | sed 's/^# //' + exit 0 + ;; + *) FILTER="$arg" ;; + esac +done + +if ! command -v jq >/dev/null 2>&1; then + echo "ERROR: jq is required" >&2 + exit 1 +fi + +if [ ! -d "$ADAPTER_DIR" ]; then + echo "ERROR: $ADAPTER_DIR does not exist" >&2 + exit 1 +fi + +# Known hosts and accepted capability enum values. The capability +# values come from reference/host-adapter-schema.md; bash/write/phase +# all share the same enum. Codex caught the earlier drift on the +# PR 6 first review pass — the docs accepted `detectable`, +# `hooked`, and `host_dependent` while the script was rejecting +# them. +KNOWN_HOSTS="claude cursor codex opencode gemini" +ENFORCEMENT_ENUM="unsupported instructions_only detectable hooked enforced host_dependent" +DISCOVERY_ENUM="native rules_file extension skill_folder instructions_only unsupported unknown host_dependent" +VERIFICATION_METHOD_ENUM="ci manual unknown" +# Supported schema versions. Bump here when the schema doc adds a new +# version and update downstream consumers in the same commit so an +# adapter cannot ship a future-incompatible shape silently. +SCHEMA_VERSION_ENUM="1" + +# Adapter names listed in the README. Adapters in this list get the +# strict fail-after-60 policy; an adapter not listed in the README is +# advisory only. Path is anchored at $NANOSTACK_ROOT so the lookup +# does not depend on the caller's cwd (Codex flagged this on the +# PR 6 third review pass — a script invoked from outside the repo +# was producing an empty list and silently downgrading fails to +# warns). +README_LISTED=$(grep -oE '`(claude|cursor|codex|opencode|gemini)`' "$NANOSTACK_ROOT/README.md" 2>/dev/null \ + | tr -d '`' | sort -u | tr '\n' ' ') + +NOW_EPOCH=$(date -u +%s) + +in_enum() { + local val="$1" enum="$2" + case " $enum " in + *" $val "*) return 0 ;; + esac + return 1 +} + +parse_iso_date() { + local d="$1" + # Strict ISO YYYY-MM-DD only. GNU `date -d` accepts non-ISO values + # like "yesterday" or "04/25/2026", which would let a malformed + # last_verified slip through the freshness gate on the Ubuntu + # CI runner. The shape check rejects those before parsing. + # Codex caught the permissive parse on the PR 6 seventh review pass. + case "$d" in + [0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]) ;; + *) return 0 ;; + esac + if command -v gdate >/dev/null 2>&1; then + gdate -u -d "$d" +%s 2>/dev/null + else + date -u -j -f "%Y-%m-%d" "$d" +%s 2>/dev/null \ + || date -u -d "$d" +%s 2>/dev/null + fi +} + +FAIL=0 +WARN=0 +RESULTS_JSON="[]" +RESULTS_TEXT="" +FILTER_MATCHED=0 + +check_adapter() { + local file="$1" + local name + name=$(basename "$file" .json) + + if [ -n "$FILTER" ] && [ "$name" != "$FILTER" ]; then + return 0 + fi + [ -n "$FILTER" ] && FILTER_MATCHED=1 + + local host last_verified bash_guard write_guard phase_gate discovery method + if ! jq -e '.' "$file" >/dev/null 2>&1; then + record_result "$name" "fail" "invalid JSON" "" 0 + return 0 + fi + # The root must be an object. A valid-JSON array (`[]`) or scalar + # used to pass this gate and crash the next jq read under set -e. + # Codex flagged the type hole on the PR 6 sixth review pass. + if ! jq -e 'type == "object"' "$file" >/dev/null 2>&1; then + record_result "$name" "fail" "root is not a JSON object" "" 0 + return 0 + fi + + host=$(jq -r '.host // ""' "$file") + last_verified=$(jq -r '.last_verified // ""' "$file") + bash_guard=$(jq -r '.bash_guard // ""' "$file") + write_guard=$(jq -r '.write_guard // ""' "$file") + phase_gate=$(jq -r '.phase_gate // ""' "$file") + discovery=$(jq -r '.skill_discovery // ""' "$file") + # `.verification.method` is read inside the type-guarded block below + # so a non-object verification (e.g. a string) does not crash jq + # before record_result lands. Codex flagged the unguarded access on + # the PR 6 fifth review pass. + + local errors="" + # Full required-field list from reference/host-adapter-schema.md. + # Codex flagged the truncated check on the PR 6 first review pass: + # the previous list omitted schema_version, verification, + # install_target, doctor_checks so an adapter could ship with no + # verification evidence and still pass. + for key in host schema_version last_verified verification skill_discovery \ + bash_guard write_guard phase_gate install_target doctor_checks; do + if ! jq -e --arg k "$key" 'has($k)' "$file" >/dev/null 2>&1; then + errors="${errors:+$errors; }missing $key" + fi + done + # Scalar required fields must be strings. A wrong scalar type like + # install_target: 123 used to pass because only key presence was + # checked. Codex flagged this on the PR 6 sixth review pass. + for str_key in host schema_version last_verified skill_discovery \ + bash_guard write_guard phase_gate install_target; do + if jq -e --arg k "$str_key" 'has($k)' "$file" >/dev/null 2>&1; then + if ! jq -e --arg k "$str_key" '.[$k] | type == "string"' "$file" >/dev/null 2>&1; then + errors="${errors:+$errors; }$str_key is not a string" + fi + fi + done + + # schema_version must match a known value. Bumping the schema means + # adding to SCHEMA_VERSION_ENUM in the same commit that updates + # downstream consumers, so an adapter cannot ship a future- + # incompatible shape silently. Codex flagged the missing version + # check on the PR 6 fourth review pass. + local schema_v + schema_v=$(jq -r '.schema_version // ""' "$file") + if [ -z "$schema_v" ]; then + errors="${errors:+$errors; }schema_version is empty" + elif ! in_enum "$schema_v" "$SCHEMA_VERSION_ENUM"; then + errors="${errors:+$errors; }schema_version=$schema_v not in supported set ($SCHEMA_VERSION_ENUM)" + fi + + # verification must be an object with method + evidence. The method + # read is wrapped with `?` and a type guard so a malformed + # verification (e.g. a string) under set -e cannot make this stage + # exit before record_result lands. Codex caught the malformed-input + # crash on the PR 6 fifth review pass. + if jq -e '.verification | type == "object"' "$file" >/dev/null 2>&1; then + local v_method + v_method=$(jq -r '.verification.method? // ""' "$file" 2>/dev/null) + if [ -z "$v_method" ]; then + errors="${errors:+$errors; }verification.method is empty" + elif ! in_enum "$v_method" "$VERIFICATION_METHOD_ENUM"; then + errors="${errors:+$errors; }verification.method=$v_method not in enum ($VERIFICATION_METHOD_ENUM)" + fi + if ! jq -e '.verification.evidence | type == "string" and length > 0' "$file" >/dev/null 2>&1; then + errors="${errors:+$errors; }verification.evidence is empty or wrong type" + fi + elif jq -e 'has("verification")' "$file" >/dev/null 2>&1; then + # Field exists but is not an object — recorded as a typed failure + # rather than letting the script exit with a jq error. + errors="${errors:+$errors; }verification is not an object" + fi + + # doctor_checks must be a non-empty array of strings. The schema in + # reference/host-adapter-schema.md says `string[]`; non-string + # entries pass to downstream doctor/setup code as check names so a + # numeric or object entry would break runtime lookups. Codex flagged + # the missing element-type check on the PR 6 third review pass. + if jq -e 'has("doctor_checks") and (.doctor_checks | type == "array")' "$file" >/dev/null 2>&1; then + if ! jq -e '.doctor_checks | length > 0' "$file" >/dev/null 2>&1; then + errors="${errors:+$errors; }doctor_checks is empty" + elif ! jq -e '.doctor_checks | all(type == "string" and length > 0)' "$file" >/dev/null 2>&1; then + errors="${errors:+$errors; }doctor_checks must be a non-empty array of strings" + fi + elif jq -e 'has("doctor_checks")' "$file" >/dev/null 2>&1; then + errors="${errors:+$errors; }doctor_checks is not an array" + fi + + if ! in_enum "$host" "$KNOWN_HOSTS"; then + errors="${errors:+$errors; }host=$host not in known set ($KNOWN_HOSTS)" + fi + # The schema (reference/host-adapter-schema.md) says + # `adapters/.json` must match the .host field. A mislabeled + # file (codex.json with host=claude) used to pass and would also + # satisfy the README missing-file cross-check, so CI could ship a + # duplicated adapter while claiming the wrong host was verified. + # Codex flagged this on the PR 6 second review pass. + if [ "$host" != "$name" ]; then + errors="${errors:+$errors; }host=$host does not match filename basename=$name" + fi + + # Empty-string capability values are NOT valid even though the key + # exists. Codex caught the empty-passes-through hole on the PR 6 + # first review pass: a README-listed adapter with bash_guard="" + # used to come back OK. + for field in bash_guard write_guard phase_gate; do + val=$(eval echo "\$$field") + if [ -z "$val" ]; then + errors="${errors:+$errors; }$field is empty" + elif ! in_enum "$val" "$ENFORCEMENT_ENUM"; then + errors="${errors:+$errors; }$field=$val not in enum ($ENFORCEMENT_ENUM)" + fi + done + + if [ -z "$discovery" ]; then + # Already reported as missing above when the key was absent. Only + # flag here if the key exists but the value is empty. + if jq -e 'has("skill_discovery")' "$file" >/dev/null 2>&1; then + errors="${errors:+$errors; }skill_discovery is empty" + fi + elif ! in_enum "$discovery" "$DISCOVERY_ENUM"; then + errors="${errors:+$errors; }skill_discovery=$discovery not in enum ($DISCOVERY_ENUM)" + fi + + local age_days="unknown" + if [ -n "$last_verified" ]; then + # Suppress set -e for the parse so we always reach record_result. + # Codex flagged the silent-exit on the PR 6 first review pass. + local then_epoch + set +e + then_epoch=$(parse_iso_date "$last_verified") + set -e + if [ -z "$then_epoch" ]; then + errors="${errors:+$errors; }last_verified=$last_verified does not parse as a date" + elif [ "$then_epoch" -gt "$NOW_EPOCH" ]; then + # A future date silently suppressed every freshness warning + # because (now - future) is negative. Codex caught this on the + # PR 6 fifth review pass: a typo like 2099-01-01 used to make + # an adapter look perpetually fresh. + errors="${errors:+$errors; }last_verified=$last_verified is in the future" + else + age_days=$(( (NOW_EPOCH - then_epoch) / 86400 )) + fi + else + errors="${errors:+$errors; }last_verified is empty" + fi + + local status="ok" + if [ -n "$errors" ]; then + status="fail" + elif [ "$age_days" != "unknown" ]; then + if [ "$age_days" -gt "$FAIL_DAYS" ]; then + case " $README_LISTED " in + *" $host "*) + if [ "${NANOSTACK_ALLOW_STALE_ADAPTERS:-0}" = "1" ]; then + status="warn" + errors="last_verified is $age_days days old (>$FAIL_DAYS); override active" + else + status="fail" + errors="last_verified is $age_days days old (>$FAIL_DAYS) and $host is README-listed" + fi + ;; + *) + status="warn" + errors="last_verified is $age_days days old (>$FAIL_DAYS) but $host is not README-listed" + ;; + esac + elif [ "$age_days" -gt "$WARN_DAYS" ]; then + status="warn" + errors="last_verified is $age_days days old (>$WARN_DAYS)" + fi + fi + + record_result "$name" "$status" "$errors" "$age_days" "$([ "$age_days" = "unknown" ] && echo 0 || echo "$age_days")" +} + +record_result() { + local name="$1" status="$2" message="$3" age_days="$4" age_int="$5" + case "$status" in + fail) FAIL=$((FAIL + 1)) ;; + warn) WARN=$((WARN + 1)) ;; + esac + RESULTS_JSON=$(echo "$RESULTS_JSON" | jq \ + --arg name "$name" \ + --arg status "$status" \ + --arg message "$message" \ + --argjson age "${age_int:-0}" \ + '. + [{adapter: $name, status: $status, age_days: $age, message: ($message // "")}]') + if [ "$status" = "ok" ]; then + RESULTS_TEXT="${RESULTS_TEXT}OK $name (age $age_days days) +" + else + local label + label=$(echo "$status" | tr '[:lower:]' '[:upper:]') + RESULTS_TEXT="${RESULTS_TEXT}${label} $name: $message +" + fi +} + +for f in "$ADAPTER_DIR"/*.json; do + [ -f "$f" ] || continue + check_adapter "$f" +done + +# When the caller passed a filter and nothing matched, treat that as a +# failure. Otherwise a typo (`check-adapters.sh codxe`) produced an +# empty summary that exited 0, suggesting CI had validated the +# requested adapter when no file matched. Codex flagged this on the +# PR 6 fourth review pass. +if [ -n "$FILTER" ] && [ "$FILTER_MATCHED" = "0" ]; then + FAIL=$((FAIL + 1)) + RESULTS_TEXT="${RESULTS_TEXT}FAIL ${FILTER}: no adapters/${FILTER}.json found (filter matched nothing) +" + RESULTS_JSON=$(echo "$RESULTS_JSON" | jq \ + --arg name "$FILTER" \ + '. + [{adapter: $name, status: "fail", age_days: 0, message: "filter matched no adapter file"}]') +fi + +# Cross-check: every README-listed adapter must have a JSON file. +# In single-host mode (caller passed a $FILTER) we only check the +# requested adapter so a partial checkout that mentions other +# adapters in its README does not fail the targeted run. Codex +# flagged the cross-host bleed on the PR 6 eighth review pass. +for host in $README_LISTED; do + [ -z "$host" ] && continue + if [ -n "$FILTER" ] && [ "$host" != "$FILTER" ]; then + continue + fi + if [ ! -f "$ADAPTER_DIR/${host}.json" ]; then + FAIL=$((FAIL + 1)) + RESULTS_TEXT="${RESULTS_TEXT}FAIL ${host}: listed in README but no adapters/${host}.json +" + RESULTS_JSON=$(echo "$RESULTS_JSON" | jq \ + --arg name "$host" \ + '. + [{adapter: $name, status: "fail", age_days: 0, message: "listed in README but no JSON file"}]') + fi +done + +if $JSON_OUT; then + jq -n --argjson results "$RESULTS_JSON" --argjson fail "$FAIL" --argjson warn "$WARN" \ + '{adapters: $results, summary: {fail: $fail, warn: $warn}}' +else + printf '%s' "$RESULTS_TEXT" + echo "---" + echo "Summary: $FAIL failed, $WARN warned" +fi + +if [ "$FAIL" -gt 0 ]; then + exit 1 +fi +exit 0 diff --git a/ci/e2e-adapter-freshness.sh b/ci/e2e-adapter-freshness.sh new file mode 100755 index 0000000..ed89502 --- /dev/null +++ b/ci/e2e-adapter-freshness.sh @@ -0,0 +1,486 @@ +#!/usr/bin/env bash +# e2e-adapter-freshness.sh — Adapter schema + freshness contract. +# +# PR 6 of the 2026-05-10 architecture audit. Locks bin/check-adapters.sh +# end-to-end against a tmp adapters/ directory so the live repo +# adapters never need to be mutated to exercise the failure paths. +# +# Spec acceptance, verbatim: +# "A malformed adapter JSON fails lint." +# "A README-listed adapter missing from adapters/ fails lint." +# "A stale adapter beyond threshold fails scheduled/manual verification." +set -e +set -u + +REPO="$(cd "$(dirname "$0")/.." && pwd)" +TMP_ROOT=$(mktemp -d /tmp/nanostack-adapter-freshness.XXXXXX) +trap 'rm -rf "$TMP_ROOT"' EXIT + +PASS=0 +FAIL=0 +GREEN='\033[0;32m' +RED='\033[0;31m' +DIM='\033[0;90m' +NC='\033[0m' + +assert_eq() { + local name="$1" expected="$2" actual="$3" + if [ "$expected" = "$actual" ]; then + PASS=$((PASS+1)) + printf " ${GREEN}OK${NC} %s\n" "$name" + else + FAIL=$((FAIL+1)) + printf " ${RED}FAIL${NC} %s\n" "$name" + printf " ${DIM}expected: %s${NC}\n" "$expected" + printf " ${DIM}actual: %s${NC}\n" "$actual" + fi +} + +# Run check-adapters.sh against a fake repo root so the live adapters +# directory stays untouched. +run_check_in() { + local root="$1" + shift + (cd "$root" && bash bin/check-adapters.sh "$@" 2>&1; echo "RC=$?") +} + +# Build a tmp repo root that has bin/check-adapters.sh + adapters/ +# pointing at our test fixtures. The README in the tmp root mentions +# the adapter set we want to lock against. +new_repo() { + local name="$1" + local root="$TMP_ROOT/$name" + mkdir -p "$root/bin" "$root/adapters" + cp "$REPO/bin/check-adapters.sh" "$root/bin/" + chmod +x "$root/bin/check-adapters.sh" + echo "$root" +} + +write_adapter() { + local root="$1" name="$2" last_verified="$3" extra_jq="${4:-.}" + local body + body=$(jq -n --arg host "$name" --arg lv "$last_verified" \ + '{host: $host, schema_version: "1", last_verified: $lv, + verification: {method: "ci", evidence: "test"}, + skill_discovery: "native", + bash_guard: "enforced", + write_guard: "enforced", + phase_gate: "enforced", + install_target: ".claude/settings.json", + doctor_checks: ["hooks"] + }') + echo "$body" | jq "$extra_jq" > "$root/adapters/${name}.json" +} + +echo "Adapter Freshness E2E" +echo "=====================" +echo "Tmp root: $TMP_ROOT" +echo + +NOW_ISO=$(date -u +%Y-%m-%d) + +# Cell 1: a fresh, complete adapter set passes. +echo "[1] fresh adapter set passes" +root=$(new_repo "cell1") +cat > "$root/README.md" <<'EOF' +README mentions `claude` and `codex` as verified adapters. +EOF +write_adapter "$root" claude "$NOW_ISO" +write_adapter "$root" codex "$NOW_ISO" +out=$(run_check_in "$root") +rc=$(echo "$out" | sed -n 's/^RC=\(.*\)/\1/p' | tail -1) +assert_eq "fresh set exits 0" "0" "$rc" + +# Cell 2: a malformed adapter (missing required field) fails. +echo "[2] missing required field fails" +root=$(new_repo "cell2") +cat > "$root/README.md" <<'EOF' +README mentions `claude`. +EOF +write_adapter "$root" claude "$NOW_ISO" 'del(.bash_guard)' +out=$(run_check_in "$root") +rc=$(echo "$out" | sed -n 's/^RC=\(.*\)/\1/p' | tail -1) +assert_eq "missing bash_guard exits 1" "1" "$rc" +echo "$out" | grep -q "missing bash_guard" && \ + assert_eq "missing field reported" "yes" "yes" || \ + assert_eq "missing field reported" "yes" "no" + +# Cell 3: enum violation (skill_discovery value not in enum) fails. +echo "[3] enum violation fails" +root=$(new_repo "cell3") +cat > "$root/README.md" <<'EOF' +README mentions `claude`. +EOF +write_adapter "$root" claude "$NOW_ISO" '.skill_discovery = "bogus"' +out=$(run_check_in "$root") +rc=$(echo "$out" | sed -n 's/^RC=\(.*\)/\1/p' | tail -1) +assert_eq "enum violation exits 1" "1" "$rc" + +# Cell 4: README-listed adapter missing from adapters/ fails. +echo "[4] README-listed adapter missing from adapters/ fails" +root=$(new_repo "cell4") +cat > "$root/README.md" <<'EOF' +README mentions `claude` and `cursor` as verified adapters. +EOF +write_adapter "$root" claude "$NOW_ISO" +# cursor.json deliberately not written +out=$(run_check_in "$root") +rc=$(echo "$out" | sed -n 's/^RC=\(.*\)/\1/p' | tail -1) +assert_eq "missing README-listed adapter exits 1" "1" "$rc" +echo "$out" | grep -q "no adapters/cursor.json" && \ + assert_eq "missing adapter reported" "yes" "yes" || \ + assert_eq "missing adapter reported" "yes" "no" + +# Cell 5: stale adapter beyond fail threshold (60 days) on a +# README-listed host fails. +echo "[5] stale README-listed adapter fails after 60 days" +root=$(new_repo "cell5") +cat > "$root/README.md" <<'EOF' +README mentions `claude`. +EOF +# 90 days ago +stale_date=$(date -u -v-90d +%Y-%m-%d 2>/dev/null || date -u --date='90 days ago' +%Y-%m-%d) +write_adapter "$root" claude "$stale_date" +out=$(run_check_in "$root") +rc=$(echo "$out" | sed -n 's/^RC=\(.*\)/\1/p' | tail -1) +assert_eq "stale README-listed adapter exits 1" "1" "$rc" +echo "$out" | grep -q "days old" && \ + assert_eq "stale message reported" "yes" "yes" || \ + assert_eq "stale message reported" "yes" "no" + +# Cell 6: a stale adapter that is NOT README-listed warns but does +# not fail. +echo "[6] stale non-README-listed adapter warns, does not fail" +root=$(new_repo "cell6") +cat > "$root/README.md" <<'EOF' +README mentions `claude` only. +EOF +write_adapter "$root" claude "$NOW_ISO" +write_adapter "$root" experimental "$stale_date" +out=$(run_check_in "$root") +rc=$(echo "$out" | sed -n 's/^RC=\(.*\)/\1/p' | tail -1) +# experimental is not a known host; the host enum check fails first. +# That is expected: an unknown host should not silently pass either. +# Use cursor (a known host) that is not listed in the README instead. +root=$(new_repo "cell6b") +cat > "$root/README.md" <<'EOF' +README mentions `claude` only. +EOF +write_adapter "$root" claude "$NOW_ISO" +write_adapter "$root" cursor "$stale_date" +out=$(run_check_in "$root") +rc=$(echo "$out" | sed -n 's/^RC=\(.*\)/\1/p' | tail -1) +assert_eq "stale non-listed adapter does NOT fail (rc 0)" "0" "$rc" +echo "$out" | grep -q "WARN" && \ + assert_eq "warn label present" "yes" "yes" || \ + assert_eq "warn label present" "yes" "no" + +# Cell 7: NANOSTACK_ALLOW_STALE_ADAPTERS=1 downgrades the fail to a +# warning so a maintainer can re-run on an old branch. +echo "[7] NANOSTACK_ALLOW_STALE_ADAPTERS=1 downgrades fail to warn" +root=$(new_repo "cell7") +cat > "$root/README.md" <<'EOF' +README mentions `claude`. +EOF +write_adapter "$root" claude "$stale_date" +out=$(cd "$root" && NANOSTACK_ALLOW_STALE_ADAPTERS=1 bash bin/check-adapters.sh 2>&1; echo "RC=$?") +rc=$(echo "$out" | sed -n 's/^RC=\(.*\)/\1/p' | tail -1) +assert_eq "override exits 0" "0" "$rc" +echo "$out" | grep -q "override active" && \ + assert_eq "override message present" "yes" "yes" || \ + assert_eq "override message present" "yes" "no" + +# Cell 7a: documented capability values from +# reference/host-adapter-schema.md (detectable, hooked, host_dependent) +# must be accepted, not rejected. Codex flagged the enum drift on +# the PR 6 first review pass. +echo "[7a] documented capability enum is honored" +root=$(new_repo "cell7a-enum") +cat > "$root/README.md" <<'EOF' +README mentions `claude` only. +EOF +# Use the full enum across three different capabilities. +write_adapter "$root" claude "$NOW_ISO" ' + .bash_guard = "detectable" + | .write_guard = "hooked" + | .phase_gate = "host_dependent" +' +out=$(run_check_in "$root") +rc=$(echo "$out" | sed -n 's/^RC=\(.*\)/\1/p' | tail -1) +assert_eq "documented capability enum passes (rc 0)" "0" "$rc" + +# Cell 7b: empty string for a required capability is treated as a +# failure even though the key exists. Codex flagged the empty-bypass +# on the PR 6 first review pass. +echo "[7b] empty capability value fails (does not silently pass)" +root=$(new_repo "cell7b-empty") +cat > "$root/README.md" <<'EOF' +README mentions `claude` only. +EOF +write_adapter "$root" claude "$NOW_ISO" '.bash_guard = ""' +out=$(run_check_in "$root") +rc=$(echo "$out" | sed -n 's/^RC=\(.*\)/\1/p' | tail -1) +assert_eq "empty bash_guard fails (rc 1)" "1" "$rc" +echo "$out" | grep -q "bash_guard is empty" && \ + assert_eq "empty-field message present" "yes" "yes" || \ + assert_eq "empty-field message present" "yes" "no" + +# Cell 7c: missing verification block fails the schema check. +# Codex caught the truncated required-field list on the PR 6 first +# review pass. +echo "[7c] missing verification block fails" +root=$(new_repo "cell7c-no-verification") +cat > "$root/README.md" <<'EOF' +README mentions `claude` only. +EOF +write_adapter "$root" claude "$NOW_ISO" 'del(.verification)' +out=$(run_check_in "$root") +rc=$(echo "$out" | sed -n 's/^RC=\(.*\)/\1/p' | tail -1) +assert_eq "missing verification exits 1" "1" "$rc" +echo "$out" | grep -q "missing verification" && \ + assert_eq "missing-verification message present" "yes" "yes" || \ + assert_eq "missing-verification message present" "yes" "no" + +# Cell 7d: unparseable last_verified surfaces a clear error and still +# completes the run (does not silent-exit under set -e). Codex P3 +# from the PR 6 first review pass. +echo "[7d] unparseable last_verified is reported (no silent exit)" +root=$(new_repo "cell7d-bad-date") +cat > "$root/README.md" <<'EOF' +README mentions `claude` only. +EOF +write_adapter "$root" claude "not-a-date" +out=$(run_check_in "$root") +rc=$(echo "$out" | sed -n 's/^RC=\(.*\)/\1/p' | tail -1) +assert_eq "unparseable date exits 1" "1" "$rc" +echo "$out" | grep -q "does not parse as a date" && \ + assert_eq "unparseable-date message present" "yes" "yes" || \ + assert_eq "unparseable-date message present" "yes" "no" + +# Cell 7e: host field must match the filename basename. A +# mislabeled file (cursor.json with host=claude) used to pass and +# would also satisfy the README missing-file check. Codex flagged +# the duplicated-adapter hole on the PR 6 second review pass. +echo "[7e] host field must match filename" +root=$(new_repo "cell7e-mislabel") +cat > "$root/README.md" <<'EOF' +README mentions `cursor`. +EOF +# Filename is cursor.json but host is "claude" — a mislabel. +write_adapter "$root" cursor "$NOW_ISO" '.host = "claude"' +out=$(run_check_in "$root") +rc=$(echo "$out" | sed -n 's/^RC=\(.*\)/\1/p' | tail -1) +assert_eq "mislabeled host exits 1" "1" "$rc" +echo "$out" | grep -q "does not match filename" && \ + assert_eq "mislabel message present" "yes" "yes" || \ + assert_eq "mislabel message present" "yes" "no" + +# Cell 7f: README path anchors at the repo root, not the caller's +# cwd. A script invoked from outside the repo used to compute an +# empty README_LISTED, which silently downgraded the fail-after-60 +# policy to a warn. Codex caught the cwd-dependent path on the PR 6 +# third review pass. +echo "[7f] check-adapters.sh reads README from the repo root, not cwd" +root=$(new_repo "cell7f-cwd") +cat > "$root/README.md" <<'EOF' +README mentions `claude`. +EOF +stale=$(date -u -v-90d +%Y-%m-%d 2>/dev/null || date -u --date='90 days ago' +%Y-%m-%d) +write_adapter "$root" claude "$stale" +# Run from a totally unrelated cwd; the README at $root must still +# be the one consulted. +elsewhere=$(mktemp -d "$TMP_ROOT/elsewhere.XXXX") +out=$(cd "$elsewhere" && bash "$root/bin/check-adapters.sh" 2>&1; echo "RC=$?") +rc=$(echo "$out" | sed -n 's/^RC=\(.*\)/\1/p' | tail -1) +assert_eq "stale README-listed adapter still fails from a foreign cwd" "1" "$rc" + +# Cell 7g: doctor_checks must be string[]. Non-string entries +# (numbers, objects) would break downstream doctor/setup code that +# uses each entry as a check name. Codex caught the missing +# element check on the PR 6 third review pass. +echo "[7g] doctor_checks rejects non-string entries" +root=$(new_repo "cell7g-doctor-types") +cat > "$root/README.md" <<'EOF' +README mentions `claude`. +EOF +write_adapter "$root" claude "$NOW_ISO" '.doctor_checks = [123]' +out=$(run_check_in "$root") +rc=$(echo "$out" | sed -n 's/^RC=\(.*\)/\1/p' | tail -1) +assert_eq "non-string doctor_checks fails (rc 1)" "1" "$rc" +echo "$out" | grep -q "must be a non-empty array of strings" && \ + assert_eq "doctor_checks message present" "yes" "yes" || \ + assert_eq "doctor_checks message present" "yes" "no" + +# Cell 7h: a filter that matches no adapter file is a failure, not a +# silent empty pass. Codex flagged the typo-passes-silently hole on +# the PR 6 fourth review pass. +echo "[7h] filter with no match fails (does not silently pass)" +root=$(new_repo "cell7h-typo-filter") +cat > "$root/README.md" <<'EOF' +README mentions `claude`. +EOF +write_adapter "$root" claude "$NOW_ISO" +out=$(cd "$root" && bash bin/check-adapters.sh codxe 2>&1; echo "RC=$?") +rc=$(echo "$out" | sed -n 's/^RC=\(.*\)/\1/p' | tail -1) +assert_eq "filter 'codxe' (typo) exits 1" "1" "$rc" +echo "$out" | grep -q "filter matched nothing" && \ + assert_eq "filter-typo message present" "yes" "yes" || \ + assert_eq "filter-typo message present" "yes" "no" + +# Cell 7i: schema_version must be in the supported set. An adapter +# declaring schema_version=2 (forward-incompatible) used to pass. +# Codex caught the missing version check on the PR 6 fourth review +# pass. +echo "[7i] schema_version is validated against the supported set" +root=$(new_repo "cell7i-schema-version") +cat > "$root/README.md" <<'EOF' +README mentions `claude`. +EOF +write_adapter "$root" claude "$NOW_ISO" '.schema_version = "2"' +out=$(run_check_in "$root") +rc=$(echo "$out" | sed -n 's/^RC=\(.*\)/\1/p' | tail -1) +assert_eq "schema_version=2 exits 1" "1" "$rc" +echo "$out" | grep -q "schema_version=2 not in supported set" && \ + assert_eq "schema-version message present" "yes" "yes" || \ + assert_eq "schema-version message present" "yes" "no" + +# Cell 7j: future last_verified must fail, not silently suppress +# freshness warnings. A typo like 2099-01-01 used to make an +# adapter look perpetually fresh; Codex caught the negative-age +# bypass on the PR 6 fifth review pass. +echo "[7j] future last_verified fails (does not bypass freshness)" +root=$(new_repo "cell7j-future") +cat > "$root/README.md" <<'EOF' +README mentions `claude`. +EOF +write_adapter "$root" claude "2099-01-01" +out=$(run_check_in "$root") +rc=$(echo "$out" | sed -n 's/^RC=\(.*\)/\1/p' | tail -1) +assert_eq "future last_verified exits 1" "1" "$rc" +echo "$out" | grep -q "is in the future" && \ + assert_eq "future-date message present" "yes" "yes" || \ + assert_eq "future-date message present" "yes" "no" + +# Cell 7k: a malformed verification block (string instead of object) +# must be reported as a typed failure, not crash jq under set -e. +# Codex caught the unguarded read on the PR 6 fifth review pass. +echo "[7k] verification as a non-object reports a typed failure" +root=$(new_repo "cell7k-verification-shape") +cat > "$root/README.md" <<'EOF' +README mentions `claude`. +EOF +write_adapter "$root" claude "$NOW_ISO" '.verification = "should be an object"' +out=$(run_check_in "$root") +rc=$(echo "$out" | sed -n 's/^RC=\(.*\)/\1/p' | tail -1) +assert_eq "non-object verification exits 1" "1" "$rc" +echo "$out" | grep -q "verification is not an object" && \ + assert_eq "verification-shape message present" "yes" "yes" || \ + assert_eq "verification-shape message present" "yes" "no" + +# Cell 7l: a JSON file whose root is not an object (e.g. an array) +# is reported as a typed failure, not a crash. Codex caught the +# silent crash on the PR 6 sixth review pass: `[]` used to pass +# the `jq -e .` check and then break the next field read. +echo "[7l] non-object JSON root reports a typed failure" +root=$(new_repo "cell7l-array-root") +cat > "$root/README.md" <<'EOF' +README mentions `claude`. +EOF +echo "[]" > "$root/adapters/claude.json" +out=$(cd "$root" && bash bin/check-adapters.sh --json 2>&1; echo "RC=$?") +rc=$(echo "$out" | sed -n 's/^RC=\(.*\)/\1/p' | tail -1) +assert_eq "array root exits 1" "1" "$rc" +echo "$out" | grep -q "root is not a JSON object" && \ + assert_eq "non-object-root message present" "yes" "yes" || \ + assert_eq "non-object-root message present" "yes" "no" +# --json should still produce a parseable summary even on this kind +# of failure. +json_only=$(echo "$out" | sed '/^RC=/d') +echo "$json_only" | jq -e '.summary.fail >= 1' >/dev/null 2>&1 && \ + assert_eq "--json still parseable on root-type failure" "yes" "yes" || \ + assert_eq "--json still parseable on root-type failure" "yes" "no" + +# Cell 7m: wrong scalar type (install_target: 123) is reported. +# Codex caught the type hole on the PR 6 sixth review pass. +echo "[7m] wrong scalar type for required field is reported" +root=$(new_repo "cell7m-scalar-type") +cat > "$root/README.md" <<'EOF' +README mentions `claude`. +EOF +write_adapter "$root" claude "$NOW_ISO" '.install_target = 123' +out=$(run_check_in "$root") +rc=$(echo "$out" | sed -n 's/^RC=\(.*\)/\1/p' | tail -1) +assert_eq "install_target as int exits 1" "1" "$rc" +echo "$out" | grep -q "install_target is not a string" && \ + assert_eq "scalar-type message present" "yes" "yes" || \ + assert_eq "scalar-type message present" "yes" "no" + +# Cell 7n: non-ISO last_verified values (e.g. "yesterday" or +# "04/25/2026") must be rejected. GNU `date -d` on Ubuntu accepts +# these forms, which would let a malformed value pass the freshness +# gate on CI. Codex caught the permissive parse on the PR 6 seventh +# review pass. +echo "[7n] non-ISO last_verified is rejected" +root=$(new_repo "cell7n-non-iso") +cat > "$root/README.md" <<'EOF' +README mentions `claude`. +EOF +write_adapter "$root" claude "yesterday" +out=$(run_check_in "$root") +rc=$(echo "$out" | sed -n 's/^RC=\(.*\)/\1/p' | tail -1) +assert_eq "'yesterday' exits 1" "1" "$rc" +echo "$out" | grep -q "does not parse as a date" && \ + assert_eq "non-ISO message present" "yes" "yes" || \ + assert_eq "non-ISO message present" "yes" "no" +write_adapter "$root" claude "04/25/2026" +out=$(run_check_in "$root") +rc=$(echo "$out" | sed -n 's/^RC=\(.*\)/\1/p' | tail -1) +assert_eq "'04/25/2026' exits 1" "1" "$rc" + +# Cell 7o: single-host mode scopes the README cross-check. A README +# that mentions claude AND cursor must let `check-adapters.sh claude` +# pass even if cursor.json is missing — the caller asked only for +# claude. Codex flagged the cross-host bleed on the PR 6 eighth +# review pass. +echo "[7o] single-host filter scopes the README cross-check" +root=$(new_repo "cell7o-filter-scope") +cat > "$root/README.md" <<'EOF' +README mentions `claude` and `cursor`. +EOF +write_adapter "$root" claude "$NOW_ISO" +# cursor.json deliberately missing. +out=$(cd "$root" && bash bin/check-adapters.sh claude 2>&1; echo "RC=$?") +rc=$(echo "$out" | sed -n 's/^RC=\(.*\)/\1/p' | tail -1) +assert_eq "filter=claude passes even though cursor.json is missing" "0" "$rc" +# Without the filter, the same setup must fail. +out=$(cd "$root" && bash bin/check-adapters.sh 2>&1; echo "RC=$?") +rc=$(echo "$out" | sed -n 's/^RC=\(.*\)/\1/p' | tail -1) +assert_eq "no filter still fails on missing cursor.json" "1" "$rc" + +# Cell 8: --json output emits a parseable summary object. +echo "[8] --json output is parseable" +root=$(new_repo "cell8") +cat > "$root/README.md" <<'EOF' +README mentions `claude`. +EOF +write_adapter "$root" claude "$NOW_ISO" +out=$(cd "$root" && bash bin/check-adapters.sh --json) +if echo "$out" | jq -e '.summary.fail == 0' >/dev/null 2>&1; then + parsed="yes" +else + parsed="no" +fi +assert_eq "--json output parses with summary.fail = 0" "yes" "$parsed" + +cd "$TMP_ROOT" + +echo +echo "=====================" +TOTAL=$((PASS + FAIL)) +if [ "$FAIL" -eq 0 ]; then + printf "${GREEN}Adapter Freshness E2E: %d checks passed, 0 failed${NC}\n" "$PASS" + exit 0 +else + printf "${RED}Adapter Freshness E2E: %d failed of %d total${NC}\n" "$FAIL" "$TOTAL" + exit 1 +fi diff --git a/guard/SKILL.md b/guard/SKILL.md index d911d17..1f6f2a4 100644 --- a/guard/SKILL.md +++ b/guard/SKILL.md @@ -100,13 +100,21 @@ When the user says `/unfreeze` or `/guard unfreeze`: ## The Check Script -`guard/bin/check-dangerous.sh` uses a three-tier permission system inspired by [Claude Code auto mode](https://www.anthropic.com/engineering/claude-code-auto-mode): +`guard/bin/check-dangerous.sh` runs every Bash call through a layered check pipeline. The order is deliberate: block rules run first so commands whose binary is on the allowlist (`cat`, `find`, `head`, `tail`) still get matched against known-bad patterns (e.g. `cat .env`, `find . -delete`). -**Tier 1: Allowlist.** Commands like `git status`, `ls`, `cat`, `jq` skip all checks. Safe by definition. +**Block rules** (run first, no exceptions). Matched against the full command string. The current rule counts are loaded from `guard/rules.json`; this doc does not hand-maintain them. -**Tier 2: In-project.** Operations that only touch files inside the current git repo pass through. They're reviewable via version control. +**Allowlist.** Commands like `git status`, `ls`, `jq` short-circuit when no block rule matched. -**Tier 3: Pattern matching.** Everything else is checked against block and warn rules in `guard/rules.json`. +**Phase-aware concurrency.** When a session is active and the current phase declares `concurrency: read` (built-in or custom), write commands are blocked with category `concurrency-safety`. The active phase's `SKILL.md` is resolved through `bin/lib/phases.sh` so custom phases get the same protection as built-in ones. + +**In-project fast-path.** Operations that only touch files inside the current git repo pass through. Reviewable via version control. Runs after the concurrency check so an in-project `touch ./foo` cannot bypass a read-phase block. + +**Sprint phase gate.** Blocks `git commit` / `git push` until the required-before-ship ancestors of the active `phase_graph` have completed. The built-in sprint defaults to review + security + qa; custom graphs gate on their own ancestor list. + +**Budget gate.** Blocks all commands when the configured budget is exceeded. + +**Warn rules.** Final pass: matched commands are allowed but flagged in the output so the user is reminded what they're doing. When a command is blocked, guard suggests a safer alternative instead of just failing: @@ -120,9 +128,9 @@ Safer alternative: git push --force-with-lease (safer, fails if remote changed) ### Configurable rules -Rules live in `guard/rules.json`. 28 block rules and 9 warn rules ship by default across 7 categories: mass-deletion, history-destruction, database-destruction, infra-destruction, production-access, remote-code-execution, security-degradation, safety-bypass. +Rules live in `guard/rules.json`. Each rule has an ID, regex pattern, category, description, and (for block rules) a safer alternative. The shipped categories include mass-deletion, history-destruction, database-destruction, infra-destruction, production-access, remote-code-execution, security-degradation, and safety-bypass. Run `jq '[.tiers.block.rules[].id] | length' guard/rules.json` (or the equivalent for warn rules) to inspect the live counts; the CI lint job derives them from this file. -Users can add custom rules by editing `guard/rules.json`. Each rule has an ID, regex pattern, category, description, and (for block rules) a safer alternative. +Users can add custom rules by editing `guard/rules.json`. ## Telemetry finalize diff --git a/guard/bin/check-dangerous.sh b/guard/bin/check-dangerous.sh index 1275d1b..b81f099 100755 --- a/guard/bin/check-dangerous.sh +++ b/guard/bin/check-dangerous.sh @@ -1,14 +1,24 @@ #!/usr/bin/env bash # Guard: check-dangerous.sh -# Three-tier permission check inspired by Claude Code auto mode. -# Tier 1: Allowlist (always safe, skip checks) -# Tier 2: In-project operations (safe, reviewable via git) -# Tier 3: Pattern matching against block/warn rules +# Layered permission check for every Bash call. Block rules run before +# the allowlist so allowlisted binaries (cat, find, head, tail) still +# match known-bad patterns like `cat .env` or `find . -delete`. +# +# Order: +# Block rules (no exceptions, fail closed) +# Allowlist (safe commands short-circuit) +# Phase-aware concurrency (read phases block write commands) +# In-project fast-path (git-reviewable changes pass) +# Sprint phase gate (blocks commit/push until required +# ancestors of ship are complete) +# Budget gate (blocks all commands when over budget) +# Warn rules (allowed but flagged) # # On block: suggests a safer alternative (deny-and-continue). # On warn: allows but flags the risk. # -# Called by Claude Code's PreToolUse hook on Bash commands. +# Called by the PreToolUse hook on Bash commands (Claude Code hosts the +# hook directly; other adapters install per their host docs). # Exit 0 = safe/warn, Exit 1 = blocked. set -euo pipefail diff --git a/llms.txt b/llms.txt index 8c4cb63..6fe9688 100644 --- a/llms.txt +++ b/llms.txt @@ -1,42 +1,54 @@ # Nanostack -Nanostack is a set of AI coding agent skills for the full engineering workflow. It works with Claude Code, Cursor, OpenAI Codex, OpenCode, Gemini CLI and any agent that reads SKILL.md files. +Nanostack is a local workflow framework for AI coding agents. It ships the default sprint (think -> plan -> build -> review -> security -> qa -> ship -> compound) plus a framework for declaring your own custom workflow stacks. Each phase is a skill that the agent reads from disk; downstream skills cross-reference each other through structured artifacts in `.nanostack/`. -## What it does +## Verified adapters -Nanostack gives an AI coding agent a structured sprint process: think, plan, build, review, test, secure, ship. Each skill acts as a specialist (CEO, engineer, QA lead, security auditor) that finds the right problem, scopes the solution, catches bugs and enforces quality before code reaches production. +These hosts have a tested adapter in `adapters/` with documented capability evidence. The `adapters/.json` files are the single source of truth. -## Skills +- Claude Code +- Cursor +- OpenAI Codex +- OpenCode +- Gemini CLI -- /think: Strategic product thinking. Three intensity modes: Founder (full pushback for experienced entrepreneurs), Startup (challenges scope but respects pain points), Builder (minimal pushback, focus on simplest solution). Six forcing questions including manual delivery test and community validation. -- /nano: Implementation planning. Scope, steps, files, risks, architecture checkpoint, product standards (shadcn/ui, SEO, LLM SEO). -- /review: Two-pass code review. Structural correctness then adversarial edge-case hunting. Auto-fixes mechanical issues, asks about judgment calls. Detects scope drift against the plan artifact. -- /qa: Quality assurance. Browser, API, CLI and debug testing. Fixes bugs with atomic commits. WTF heuristic stops when further fixes would introduce regressions. -- /security: Security audit. Auto-detects stack, scans for secrets, injection, auth flaws, CI/CD misconfigs, AI/LLM vulnerabilities. Graded report (A-F). Cross-references /review findings for conflict detection. -- /ship: Ship to production. Pre-flight checks, PR creation, CI monitoring, post-deploy verification, rollback plan. Generates sprint journal automatically. -- /guard: Three-tier safety. Tier 1: allowlist of safe commands. Tier 2: in-project operations pass (reviewable via git). Tier 3: pattern matching against 28 block rules and 9 warn rules. Blocked commands get a safer alternative. Rules configurable in guard/rules.json. -- /conductor: Multi-agent sprint orchestrator. Coordinates parallel sessions through claim/complete protocol with atomic file locking. +Other agents may read the SKILL.md files directly, but are not verified adapters unless they appear in this list. Capabilities (hook enforcement, write guard, phase gate) vary by host; the JSON files spell out what each adapter actually enforces. -## Know-how +## Default sprint -Skills automatically save structured artifacts to .nanostack/ after every run. Skills cross-reference each other: /review reads /nano for scope drift, /security reads /review for conflict detection. /ship generates a sprint journal from all phase artifacts. The know-how vault at .nanostack/know-how/ works as an Obsidian vault with linked journals, analytics dashboards and learnings. Bad sessions can be discarded with bin/discard-sprint.sh. +- /think: Strategic product thinking with calibrated intensity per archetype (founder validation, CLI tooling, API backend, landing experience). Saves a structured artifact with value proposition, scope mode, target user, narrowest wedge, key risk, and premise validation. +- /nano (alias /plan): Implementation plan with planned_files, plan approval, scope assessment, and product standards. +- build: The agent's own dev work. Not a saved phase; the artifact appears on review. +- /review: Two-pass code review (structural + adversarial). Detects scope drift against the plan and conflict-precedence against prior /security. +- /security: OWASP Top 10 + STRIDE audit, stack-aware. Cross-references /review for conflict resolution. +- /qa: Browser, API, CLI, or debug testing. WTF heuristic to stop when further fixes regress. +- /ship: Pre-flight, PR creation, CI monitoring, post-deploy verification. Generates the sprint journal. +- /compound: Reflection and learning capture after /ship. + +## Framework for custom workflow stacks + +Custom stacks declare their own phases in `.nanostack/config.json` (`custom_phases` + `phase_graph`) and live under `/skills//`. The conductor, guard, session, next-step, and resolver all consume the same phase registry, so a custom stack such as `build -> license-audit -> privacy-check -> release-readiness -> ship` gets the same lifecycle support as the built-in sprint: graph-aware progression, concurrency enforcement, artifact trust, schema validation, and routing intent through `phase_context`. + +See `reference/custom-stack-contract.md` for the contract, `examples/custom-stack-template/compliance-release/` for a worked example, and `EXTENDING.md` for the new-user walkthrough. + +## Guard + +`guard/` enforces block rules and warn rules on Bash and Write/Edit calls. Rules live in `guard/rules.json` and are loaded at runtime; counts are not hand-maintained in docs. Block rules run before the allowlist so commands whose binary is allowlisted (cat, find, head, tail) still hit pattern checks for known-bad arguments. Phase concurrency, the sprint phase gate, and the budget gate run inside the same check pipeline. + +## Artifacts and trust + +Every artifact written by `bin/save-artifact.sh` carries a SHA-256 integrity field. `bin/find-artifact.sh` has a `--require-integrity` flag for strict consumers, and `bin/resolve.sh` exposes per-upstream trust state in its `upstream_status` field (`verified`, `integrity_missing`, `integrity_mismatch`, `missing`, `not_applicable`). + +## Privacy + +There is no Nanostack cloud. Artifacts, journals, and analytics stay under `.nanostack/` on disk. Telemetry is opt-in and documented in `reference/telemetry.md`; the on-by-default behavior is no remote calls. ## Install +``` git clone https://github.com/garagon/nanostack.git ~/.claude/skills/nanostack cd ~/.claude/skills/nanostack && ./setup - -## Key differentiators - -- Questions what you're building before you build it (not just a coding assistant) -- Calibrated intensity: full pushback for founders, respectful diagnostic for users with clear pain -- Structured sprint: think, plan, build, review, qa, security, ship -- Cross-skill coordination: scope drift detection, conflict resolution with 10 built-in precedents -- Know-how pipeline: artifacts auto-save, skills cross-reference, sprint journals generate on ship -- Three-tier guard with deny-and-continue (suggests safer alternatives) -- Product standards enforced at planning time (shadcn/ui, SEO, LLM SEO) -- Zero dependencies, zero build step, works with any AI coding agent -- Privacy: no telemetry, no remote calls, all data stays local +``` ## Links