From d3a2041413498e6955b051a75db20d32e4de4140 Mon Sep 17 00:00:00 2001 From: duriantaco Date: Sun, 10 May 2026 12:04:45 +0800 Subject: [PATCH] ci: add release gates and advisory Skylos scan --- .github/workflows/ci.yml | 114 +++++++++++++++++++++++++++++++++++++++ .gitignore | 4 ++ COMPARISON.md | 7 +-- README.md | 59 ++++++++++++++++---- 4 files changed, 171 insertions(+), 13 deletions(-) create mode 100644 .github/workflows/ci.yml diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..42dfd64 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,114 @@ +name: CI + +on: + pull_request: + push: + branches: + - main + +permissions: + contents: read + +concurrency: + group: ci-${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + go: + name: Go checks + runs-on: ubuntu-latest + steps: + - name: Check out repo + uses: actions/checkout@v4 + + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version-file: go.mod + cache: true + + - name: Check formatting + run: | + files="$(gofmt -l .)" + if [ -n "$files" ]; then + echo "Go files need gofmt:" + echo "$files" + exit 1 + fi + + - name: Check module tidiness + run: | + go mod tidy + git diff --exit-code -- go.mod go.sum + + - name: Run tests + run: go test ./... + + vouchbench: + name: VouchBench acceptance + runs-on: ubuntu-latest + steps: + - name: Check out repo + uses: actions/checkout@v4 + + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version-file: go.mod + cache: true + + - name: Run VouchBench + run: scripts/vouchbench.sh --out /tmp/vouchbench + + - name: Add benchmark summary + if: always() + run: | + if [ -f /tmp/vouchbench/vouchbench.latest.md ]; then + cat /tmp/vouchbench/vouchbench.latest.md >> "$GITHUB_STEP_SUMMARY" + fi + + - name: Upload benchmark artifacts + if: always() + uses: actions/upload-artifact@v4 + with: + name: vouchbench-results + path: /tmp/vouchbench + if-no-files-found: ignore + + skylos: + name: Skylos advisory + runs-on: ubuntu-latest + continue-on-error: true + steps: + - name: Check out repo + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.13" + + - name: Install Skylos + continue-on-error: true + run: python -m pip install --upgrade skylos + + - name: Run advisory scan + continue-on-error: true + run: | + skylos . \ + --all \ + --severity high \ + --github \ + --summary \ + --sarif skylos.sarif.json \ + --no-upload \ + --force \ + --limit 50 + + - name: Upload Skylos SARIF artifact + if: always() + uses: actions/upload-artifact@v4 + with: + name: skylos-sarif + path: skylos.sarif.json + if-no-files-found: ignore diff --git a/.gitignore b/.gitignore index d4c59f2..1d36aa7 100644 --- a/.gitignore +++ b/.gitignore @@ -23,6 +23,10 @@ __pycache__/ .mypy_cache/ .ruff_cache/ +# Skylos local scanner cache +.skylos/cache/ +skylos.sarif.json + # Local env files .env .env.* diff --git a/COMPARISON.md b/COMPARISON.md index 1e81f82..22c9bd8 100644 --- a/COMPARISON.md +++ b/COMPARISON.md @@ -4,9 +4,10 @@ Vouch should be honest about where it fits. It is not a replacement for Sigstore, SLSA, in-toto, OPA, or Conftest. The defensible position is that Vouch is an obligation-oriented control plane that can compose with those tools. -One-sentence version: Vouch turns human-owned product intent into obligation IDs -and evidence requirements, then uses supply-chain identity, policy engines, and -runner artifacts to decide whether an agent change can ship. +One-sentence version: Vouch is the layer between "tests passed" and "ship it" +for AI-written code; it maps the contract a change touched to the exact evidence +required to release it, then composes with supply-chain identity, policy engines, +and runner artifacts to decide whether the change can ship. ## What Existing Tools Cover diff --git a/README.md b/README.md index f40d69b..4a7c269 100644 --- a/README.md +++ b/README.md @@ -4,9 +4,37 @@ # Vouch -Vouch is a contract-and-evidence release gate for AI-written code. +Vouch keeps risky AI-written changes from shipping just because the tests passed. -It does one narrow job: given human-owned intent for a part of a repo, Vouch compiles that intent into machine-checkable obligations, links runner-produced evidence to those obligations, and returns a deterministic release decision: `block`, `human_escalation`, `canary`, or `auto_merge`. +It lets a team say, in code, what must remain true for important parts of a +repo, then requires evidence for those obligations before an agent change can +merge or release. + +In one sentence: Vouch turns human-owned release intent into stable obligation +IDs, checks runner artifacts against those obligations, and returns a +deterministic decision: `block`, `human_escalation`, `canary`, or `auto_merge`. + +## The Short Version + +Use Vouch when "CI passed" is too weak for agent-authored changes. + +Example: an agent changes password reset. The unit tests pass. That still does +not prove the change preserved the intended behavior, avoided logging reset +tokens, exposed the right runtime signal, and has a rollback path. + +With Vouch: + +1. A human-owned contract says what `auth.password_reset` is responsible for. +2. Vouch compiles that contract into stable behavior, security, test, runtime, + and rollback obligation IDs. +3. Existing runners produce JUnit, scanner, probe, metric, verifier, or rollback + artifacts. +4. Vouch links those artifacts to the obligation IDs and applies release policy. +5. CI gets an auditable decision instead of a vague "looks good." + +The value is not more AI review. The value is a deterministic release boundary: +for the contract this change touched, the required evidence exists or the change +does not ship. ## Contents @@ -25,21 +53,31 @@ It does one narrow job: given human-owned intent for a part of a repo, Vouch com ## The Problem -AI agents can produce code faster than humans can carefully review every line. CI can tell you whether commands passed, but it usually cannot answer the release question that matters for agent changes: +AI agents can produce code faster than humans can carefully review every line. +CI can tell you whether commands passed, but it usually cannot answer the +release question that matters for agent changes: > For the contracts this change touches, are the required behavior, security, test, runtime, and rollback obligations covered by valid evidence? -Vouch adds that missing control plane. Humans declare what must remain true, existing runners produce evidence, and Vouch checks whether the evidence is complete enough for the repo's release policy. +Vouch adds that missing control plane. Humans declare what must remain true, +existing runners produce evidence, and Vouch checks whether the evidence is +complete enough for the repo's release policy. ## Why Use It Use Vouch when you want: -- Agent changes tied to explicit, human-owned product and release contracts. -- Passing tests to be necessary but not sufficient for risky changes. -- Stable obligation IDs that connect intent, changed files, evidence artifacts, and release decisions. -- A CI-friendly gate that consumes existing JUnit, JSON, text, verifier, metric, and rollback artifacts instead of replacing your runner. -- Optional signed evidence checks that bind artifacts to approved runner identities. +- A hard rule that risky agent changes need evidence, not just passing tests or + another model's approval. +- Human-owned product and release contracts for repo areas such as auth, + payments, permissions, data deletion, migrations, public APIs, and production + rollout. +- Stable obligation IDs that connect intent, changed files, evidence artifacts, + policy, and release decisions. +- A CI-friendly gate that consumes existing JUnit, JSON, text, verifier, metric, + and rollback artifacts instead of replacing your runner. +- Optional signed evidence checks that bind artifacts to approved runner + identities. Do not use Vouch for every small change. For low-risk edits, normal CI and review may be enough. Vouch is useful when the cost of a bad agent change is high enough that "tests passed" and "another agent said it looks fine" are not strong enough release criteria. @@ -57,7 +95,8 @@ Vouch is different because it asks for explicit release evidence: | Can the result be audited later? | Usually a prose comment or chat transcript. | Manifest, evidence artifacts, coverage, policy rule, and decision. | | Can CI enforce it deterministically? | Not reliably without custom glue. | Yes, `vouch gate` exits non-zero on `block`. | -The benefit is not "more AI review." The benefit is a release boundary that says: for this kind of change, these obligations must have these evidence artifacts, or the change does not ship. +The benefit is a release boundary that says: for this kind of change, these +obligations must have these evidence artifacts, or the change does not ship. That is also why there are several moving parts: