diff --git a/CLAUDE.md b/CLAUDE.md index 4df3522..813fbf7 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -29,7 +29,7 @@ Backend is Fastify + Mastra. Fastify serves the HTTP API (Clerk JWT auth on prot The schema inference pipeline: frontend calls `POST /infer-schema` → Fastify verifies the Clerk JWT → calls `inferSchema()` in `backend/src/pipeline/schema-inference.ts` → Claude Sonnet 4.6 via OpenRouter → returns a Zod-validated `DatasetSchema` → frontend maps it to editable columns in the wizard. -The populate pipeline: frontend calls `POST /populate` with `{ datasetId, datasetName, description, columns }` → Fastify verifies the Clerk JWT → triggers `populateWorkflow` which: (1) clears existing rows, (2) builds a prompt from the schema, (3) runs the populate agent (Claude Sonnet 4.6) which searches the web via TinyFish APIs, then inserts rows into Convex one by one. Rows appear in realtime on the frontend via Convex reactive queries. +The populate pipeline: frontend calls `POST /populate` with `{ datasetId, datasetName, description, columns }` → Fastify verifies the Clerk JWT → runs the self-healing populate service. The service builds or reuses a recipe, runs the Mastra populate runtime against TinyFish search/fetch, validates source-backed rows, repairs bad recipes, promotes the passing recipe, then atomically replaces the dataset rows in Convex. Rows appear in realtime on the frontend via Convex reactive queries. Convex functions use `ctx.auth.getUserIdentity()` to get the authenticated user. The `ownerId` field on datasets stores `identity.subject` (Clerk user ID). Do not pass `ownerId` from the client. @@ -49,4 +49,10 @@ Convex is self-hosted — it does NOT hot-reload when you edit files in `fronten In CI/prod, run `npx convex deploy` with `CONVEX_SELF_HOSTED_URL` and `CONVEX_SELF_HOSTED_ADMIN_KEY` set as env vars. +## Self-Healing Verification + +Run `make verify-self-healing` before handing the stack to another agent. It runs backend tests, backend build, adapter syntax checks, and a no-key benchmark smoke that should block cleanly without spending API credits. + +Use `bash scripts/verify-self-healing-stack.sh --real-benchmark` for the 2-prompt real Mastra benchmark, and `bash scripts/verify-self-healing-stack.sh --convex-push --dataset-id ` for a live app dataset dry-run. Export the required env vars before live modes; the verifier does not parse secret files itself. Add `--commit` only when you intentionally want to replace rows. + This is an open-source (AGPL) project. Do not commit secrets, API keys, or internal docs. diff --git a/benchmarks/dataset-agent/README.md b/benchmarks/dataset-agent/README.md index 016738d..57eded5 100644 --- a/benchmarks/dataset-agent/README.md +++ b/benchmarks/dataset-agent/README.md @@ -21,6 +21,32 @@ Real Mastra benchmark runs require `OPENROUTER_API_KEY` and `TINYFISH_API_KEY` loaded execution-only. If either is missing, the adapter returns a blocked benchmark result instead of touching app data. +## Verify Self-Healing Stack + +Use this before asking someone else to migrate a new collection agent into the +app path: + +```bash +make verify-self-healing +``` + +That command runs backend tests, backend build, adapter syntax checks, and a +no-key benchmark smoke that must produce a clean `blocked` result without +spending OpenRouter or TinyFish credits. + +Live checks are explicit: + +```bash +bash scripts/verify-self-healing-stack.sh --real-benchmark +bash scripts/verify-self-healing-stack.sh --convex-push --dataset-id +bash scripts/verify-self-healing-stack.sh --convex-push --dataset-id --commit +``` + +The live benchmark and dataset smoke expect required env vars to already be +exported in the shell. They print only missing key names and never print secret +values. The `--convex-push` mode still uses the existing `make convex-push` +target, which requires `frontend/.env.local`. + ## Benchmark Env For each prompt the runner sets: diff --git a/makefiles/Makefile b/makefiles/Makefile index 497efef..633df80 100644 --- a/makefiles/Makefile +++ b/makefiles/Makefile @@ -1,4 +1,4 @@ -.PHONY: all dev down clean convex-push convex-env +.PHONY: all dev down clean convex-push convex-env verify-self-healing all: dev @@ -33,6 +33,9 @@ convex-push: --url http://127.0.0.1:3210 \ --admin-key "$$(grep CONVEX_SELF_HOSTED_ADMIN_KEY .env.local | cut -d= -f2-)" +verify-self-healing: + bash scripts/verify-self-healing-stack.sh + down: docker compose -f docker-compose.dev.yml down diff --git a/scripts/verify-self-healing-stack.sh b/scripts/verify-self-healing-stack.sh new file mode 100755 index 0000000..58c4793 --- /dev/null +++ b/scripts/verify-self-healing-stack.sh @@ -0,0 +1,288 @@ +#!/usr/bin/env bash +set -uo pipefail + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +cd "$ROOT_DIR" || exit 1 + +DATASET_ID="" +SHOULD_COMMIT_ROWS=0 +SHOULD_RUN_CONVEX_PUSH=0 +SHOULD_RUN_LOCAL_GATES=1 +SHOULD_RUN_BLOCKED_BENCHMARK_SMOKE=1 +SHOULD_RUN_REAL_BENCHMARK=0 +EXIT_STATUS=0 + +usage() { + cat <<'USAGE' +Usage: + bash scripts/verify-self-healing-stack.sh [options] + +Options: + --dataset-id Run a live self-healing populate smoke for one dataset. + --commit Commit rows for --dataset-id instead of dry-run. + --convex-push Deploy Convex functions before the live dataset smoke. + --real-benchmark Run a 2-prompt real Mastra benchmark. May spend API credits. + --skip-local Skip backend test/build/node-check gates. + --no-blocked-smoke Skip the no-key benchmark blocked-contract smoke. + -h, --help Show this help. + +Default behavior runs only local checks and a no-key benchmark smoke. It does +not load secret files and does not spend OpenRouter or TinyFish credits. Live +dataset and benchmark modes require needed env vars to be exported already. +USAGE +} + +mark_pass() { + printf 'PASS %s\n' "$1" +} + +mark_fail() { + printf 'FAIL %s\n' "$1" + EXIT_STATUS=1 +} + +mark_blocked() { + printf 'BLOCK %s\n' "$1" + if [[ "$EXIT_STATUS" -eq 0 ]]; then + EXIT_STATUS=2 + fi +} + +run_required_step() { + local label="$1" + shift + + printf 'RUN %s\n' "$label" + if "$@"; then + mark_pass "$label" + else + mark_fail "$label" + fi +} + +require_command() { + local command_name="$1" + if command -v "$command_name" >/dev/null 2>&1; then + return 0 + fi + mark_blocked "missing command: ${command_name}" + return 1 +} + +require_env_var() { + local env_name="$1" + if [[ -n "${!env_name:-}" ]]; then + return 0 + fi + mark_blocked "missing env: ${env_name}" + return 1 +} + +check_docker_compose_ready() { + require_command docker || return 1 + docker compose -f docker-compose.dev.yml ps >/dev/null 2>&1 +} + +check_convex_ready() { + local convex_url="$1" + require_command curl || return 1 + curl -sf "${convex_url%/}/version" >/dev/null 2>&1 +} + +run_blocked_benchmark_smoke() { + local out_dir="benchmark-results/self-healing-blocked-smoke-$(date +%Y%m%d-%H%M%S)" + local stdout_file="${out_dir}/runner-stdout.json" + + mkdir -p "$out_dir" + printf 'RUN mastra benchmark no-key blocked smoke\n' + if ! env -u OPENROUTER_API_KEY -u TINYFISH_API_KEY node benchmarks/dataset-agent/run-benchmark.mjs \ + --prompt-ids latest-ai-blog-posts \ + --timeout-ms 60000 \ + --out "$out_dir" \ + --system "mastra=node --import ./backend/node_modules/tsx/dist/esm/index.mjs benchmarks/dataset-agent/adapters/mastra-populate-adapter.mjs" \ + > "$stdout_file"; then + mark_fail "mastra benchmark no-key blocked smoke" + return + fi + + if node -e ' +const fs = require("fs"); +const summary = JSON.parse(fs.readFileSync(process.argv[1], "utf8")); +const group = summary.aggregate?.[0]; +if (!group || group.total !== 1 || group.blocked !== 1 || group.failed !== 0) { + console.error("expected exactly one blocked benchmark result"); + process.exit(1); +} +const aggregateSpendFields = [ + "totalRows", + "totalPromptTokens", + "totalCompletionTokens", + "totalTokens", + "searchCallCount", + "fetchCallCount", + "browserCallCount", + "agentRunCount", + "agentStepCount", + "estimatedTotalCostUsd", +]; +const nonZeroAggregateFields = aggregateSpendFields.filter( + (field) => Number(group[field] ?? 0) !== 0 +); +if (nonZeroAggregateFields.length > 0) { + console.error(`expected zero spend/calls for blocked smoke: ${nonZeroAggregateFields.join(", ")}`); + process.exit(1); +} +for (const result of summary.laneResults ?? []) { + const laneSpendFields = [ + ["rowCount", result.rowCount], + ["promptTokens", result.usage?.promptTokens], + ["completionTokens", result.usage?.completionTokens], + ["totalTokens", result.usage?.totalTokens], + ["searchCallCount", result.searchCallCount], + ["fetchCallCount", result.fetchCallCount], + ["browserCallCount", result.browserCallCount], + ["agentRunCount", result.agentRunCount], + ["agentStepCount", result.agentStepCount], + ["estimatedTotalCostUsd", result.estimatedTotalCostUsd], + ]; + const nonZeroLaneFields = laneSpendFields + .filter(([, value]) => Number(value ?? 0) !== 0) + .map(([field]) => field); + if (nonZeroLaneFields.length > 0) { + console.error(`expected zero spend/calls for blocked lane: ${nonZeroLaneFields.join(", ")}`); + process.exit(1); + } +} +' "${out_dir}/summary.json"; then + mark_pass "mastra benchmark no-key blocked smoke (${out_dir})" + else + mark_fail "mastra benchmark no-key blocked smoke" + fi +} + +run_real_benchmark() { + require_env_var OPENROUTER_API_KEY || return + require_env_var TINYFISH_API_KEY || return + + local out_dir="benchmark-results/self-healing-real-smoke-$(date +%Y%m%d-%H%M%S)" + local stdout_file="${out_dir}/runner-stdout.json" + + mkdir -p "$out_dir" + printf 'RUN mastra real benchmark smoke\n' + if node benchmarks/dataset-agent/run-benchmark.mjs \ + --prompt-ids latest-ai-blog-posts,saas-pricing-pages \ + --timeout-ms 900000 \ + --out "$out_dir" \ + --system "mastra=node --import ./backend/node_modules/tsx/dist/esm/index.mjs benchmarks/dataset-agent/adapters/mastra-populate-adapter.mjs" \ + > "$stdout_file"; then + mark_pass "mastra real benchmark smoke (${out_dir})" + else + mark_fail "mastra real benchmark smoke" + fi +} + +run_live_dataset_smoke() { + require_env_var CONVEX_URL || return + require_env_var CONVEX_SELF_HOSTED_ADMIN_KEY || return + require_env_var OPENROUTER_API_KEY || return + require_env_var TINYFISH_API_KEY || return + + if ! check_convex_ready "$CONVEX_URL"; then + mark_blocked "Convex is not reachable at ${CONVEX_URL%/}/version" + return + fi + + local populate_args=(--dataset-id "$DATASET_ID" --max-rows 3) + local label="self-healing dataset smoke dry-run" + if [[ "$SHOULD_COMMIT_ROWS" -eq 1 ]]; then + populate_args+=(--commit) + label="self-healing dataset smoke commit" + fi + + run_required_step "$label" npm --silent --prefix backend run populate:self-heal -- "${populate_args[@]}" +} + +while [[ "$#" -gt 0 ]]; do + case "$1" in + --dataset-id) + DATASET_ID="${2:-}" + if [[ -z "$DATASET_ID" ]]; then + printf 'Error: --dataset-id requires a value.\n' >&2 + exit 1 + fi + shift 2 + ;; + --commit) + SHOULD_COMMIT_ROWS=1 + shift + ;; + --convex-push) + SHOULD_RUN_CONVEX_PUSH=1 + shift + ;; + --real-benchmark) + SHOULD_RUN_REAL_BENCHMARK=1 + shift + ;; + --skip-local) + SHOULD_RUN_LOCAL_GATES=0 + shift + ;; + --no-blocked-smoke) + SHOULD_RUN_BLOCKED_BENCHMARK_SMOKE=0 + shift + ;; + -h|--help) + usage + exit 0 + ;; + *) + printf 'Error: unknown option: %s\n' "$1" >&2 + usage >&2 + exit 1 + ;; + esac +done + +if [[ "$SHOULD_COMMIT_ROWS" -eq 1 && -z "$DATASET_ID" ]]; then + printf 'Error: --commit requires --dataset-id.\n' >&2 + exit 1 +fi + +if [[ "$SHOULD_RUN_LOCAL_GATES" -eq 1 ]]; then + run_required_step "backend tests" npm --prefix backend test + run_required_step "backend build" npm --prefix backend run build + run_required_step "mastra adapter syntax" node --check benchmarks/dataset-agent/adapters/mastra-populate-adapter.mjs +fi + +if [[ "$SHOULD_RUN_BLOCKED_BENCHMARK_SMOKE" -eq 1 ]]; then + run_blocked_benchmark_smoke +fi + +if [[ "$SHOULD_RUN_CONVEX_PUSH" -eq 1 ]]; then + if [[ ! -f frontend/.env.local ]]; then + mark_blocked "frontend/.env.local missing; cannot run make convex-push" + elif ! check_docker_compose_ready; then + mark_blocked "Docker Compose is not ready; cannot run make convex-push" + elif ! check_convex_ready "http://127.0.0.1:3210"; then + mark_blocked "Convex is not reachable at http://127.0.0.1:3210/version" + else + run_required_step "convex push" make convex-push + fi +fi + +if [[ "$SHOULD_RUN_REAL_BENCHMARK" -eq 1 ]]; then + run_real_benchmark +fi + +if [[ -n "$DATASET_ID" ]]; then + run_live_dataset_smoke +fi + +case "$EXIT_STATUS" in + 0) printf 'DONE self-healing stack verification passed\n' ;; + 1) printf 'DONE self-healing stack verification failed\n' ;; + 2) printf 'DONE self-healing stack verification blocked by local prerequisites\n' ;; +esac + +exit "$EXIT_STATUS"