Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
c00eef8
Persist self-healing process traces
giaphutran12 May 22, 2026
8bafe26
Gate Playwright candidate readiness
giaphutran12 May 22, 2026
08bce46
Ingest collection browser action traces
giaphutran12 May 22, 2026
05f2e9b
Preserve Agent browser actions in reports
giaphutran12 May 22, 2026
c9f8438
Expose self-healing benchmark diagnostics
giaphutran12 May 22, 2026
f5a6e77
Gate benchmark runs on Playwright readiness
giaphutran12 May 23, 2026
25be451
Surface Agent run provenance diagnostics
giaphutran12 May 23, 2026
43cb7a3
Gate rejected self-healing benchmark candidates
giaphutran12 May 23, 2026
d2b4a75
Refresh current self-healing stack plan
giaphutran12 May 23, 2026
c9097a8
Cap self-healing row commits
giaphutran12 May 23, 2026
b92345f
Merge main into self-healing stack rollup
giaphutran12 May 23, 2026
d818ba3
Ask Agent to emit browser actions
giaphutran12 May 23, 2026
edf1402
Emit Playwright candidate scripts
giaphutran12 May 23, 2026
70af5ec
Document meeting notes 6 agent map
giaphutran12 May 23, 2026
4fcd69b
Clarify self-healing docs and fix frontend verification
giaphutran12 May 23, 2026
1828f4d
Keep self-healing PR public safe and configurable
giaphutran12 May 23, 2026
37af0f4
Clarify Mastra as the app demo path
giaphutran12 May 23, 2026
f06258a
Clarify dev env setup failures
giaphutran12 May 23, 2026
cbca9a4
Merge PR 52 base into self-healing PR
giaphutran12 May 23, 2026
2bb743d
Connect self-healing populate to app path
giaphutran12 May 23, 2026
3cef4f9
Fix self-healing populate trust gaps
giaphutran12 May 23, 2026
67d70eb
Advance self-healing stack and root env setup
giaphutran12 May 24, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 42 additions & 1 deletion .env.example
Original file line number Diff line number Diff line change
@@ -1,14 +1,30 @@
# These are read by docker-compose.dev.yml.
# This is the only local env file BigSet expects.
# Copy this file to .env and fill in your values.

# Local service URLs
CLIENT_ORIGIN=http://localhost:3500
CONVEX_URL=http://localhost:3210
NEXT_PUBLIC_CONVEX_URL=http://127.0.0.1:3210
CONVEX_SELF_HOSTED_URL=http://127.0.0.1:3210
NEXT_PUBLIC_BACKEND_URL=http://localhost:3501
PORT=3501

# Clerk — create a free app at https://dashboard.clerk.com
# Enable the Clerk JWT Templates -> Convex template, then set your issuer URL.
NEXT_PUBLIC_CLERK_PUBLISHABLE_KEY=pk_test_...
CLERK_SECRET_KEY=sk_test_...
CLERK_JWT_ISSUER_DOMAIN=https://your-app.clerk.accounts.dev

# OpenRouter — required by backend + Mastra for AI model calls.
# Generate at https://openrouter.ai/settings/keys
OPENROUTER_API_KEY=sk-or-...

# Optional model overrides.
# Schema inference defaults to anthropic/claude-sonnet-4-6.
# Populate and other non-inference tasks default to google/gemini-3.1-flash-lite.
# OPENROUTER_MODEL=google/gemini-3.1-flash-lite
# OPENROUTER_POPULATE_MODEL=google/gemini-3.1-flash-lite

# TinyFish — required by populate agent web search/fetch.
# Generate at https://agent.tinyfish.ai/api-keys
TINYFISH_API_KEY=
Expand All @@ -22,6 +38,31 @@ CONVEX_SELF_HOSTED_ADMIN_KEY=
# Docker dev overrides this to /app/.bigset/populate-recipes on a named volume.
POPULATE_RECIPE_STORE_DIR=.bigset/populate-recipes

# Populate runtime limits.
# POPULATE_MAX_ROWS=100
# POPULATE_MAX_SEARCH_CALLS=25
# POPULATE_MAX_FETCH_CALLS=50
# POPULATE_COMMIT_ROW_LIMIT_PER_HOUR=1000

# Browser-action self-healing. Non-secret tunables.
POPULATE_ENABLE_BROWSER_ACTION_BOX=true
POPULATE_BROWSER_ACTION_BOX_POLL_INTERVAL_MS=3000
POPULATE_ENABLE_PLAYWRIGHT_REPLAY=true
POPULATE_ENABLE_PLAYWRIGHT_REPAIR=true
POPULATE_PLAYWRIGHT_HEADLESS=true
# POPULATE_PLAYWRIGHT_EXECUTABLE_PATH=

# Collection-agent canaries. Leave Mastra as the default app runtime unless
# intentionally benchmarking the collection runner.
# POPULATE_AGENT_RUNTIME=collection
# POPULATE_COLLECTION_RUNNER_MODULE=./backend/src/pipeline/collection-agent-runner.ts
COLLECTION_AGENT_PIPELINE_MODULE=./backend/BigSet_Data_Collection_Agent/src/orchestrator/pipeline.ts
COLLECTION_AGENT_ENABLE_TRIAGE=true
COLLECTION_AGENT_ENABLE_AGENT=false
COLLECTION_AGENT_POLL_TIMEOUT_MS=1200000
AGENT_POLL_TIMEOUT_MS=1200000
AGENT_REQUEST_TIMEOUT_MS=15000

# PostHog (optional — leave blank to disable analytics entirely in local dev).
# Get from https://us.posthog.com/project/settings/general.
NEXT_PUBLIC_POSTHOG_KEY=
Expand Down
12 changes: 6 additions & 6 deletions CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,14 @@ Frontend on :3500, backend on :3501, Mastra Studio on :4111, Convex dashboard on

1. Create a free Clerk account at https://clerk.com and create an application.
2. In the Clerk dashboard, go to **JWT Templates** and enable the **Convex** template.
3. Copy `frontend/.env.example` to `frontend/.env.local` and fill in your Clerk keys:
3. Copy `.env.example` to `.env` and fill in your Clerk keys:
- `NEXT_PUBLIC_CLERK_PUBLISHABLE_KEY` — from Clerk API Keys
- `CLERK_SECRET_KEY` — from Clerk API Keys
- `CLERK_JWT_ISSUER_DOMAIN` — your Frontend API URL (e.g. `https://your-app.clerk.accounts.dev`)
4. Add an OpenRouter API key to the root `.env` file: `OPENROUTER_API_KEY=sk-or-...` (get one at https://openrouter.ai/settings/keys). Docker Compose reads the root `.env` and passes it to the backend and Mastra containers.
4b. Add a TinyFish API key to the root `.env` file: `TINYFISH_API_KEY=...` (get one at https://agent.tinyfish.ai/api-keys). This enables the populate agent to search the web and fetch page content.
4. Add an OpenRouter API key to `.env`: `OPENROUTER_API_KEY=sk-or-...` (get one at https://openrouter.ai/settings/keys). Docker Compose reads root `.env` and passes it to frontend, backend, and Mastra containers.
4b. Add a TinyFish API key to `.env`: `TINYFISH_API_KEY=...` (get one at https://agent.tinyfish.ai/api-keys). This enables the populate agent to search the web and fetch page content.
5. Run `make dev` — this starts all Docker services AND pushes Convex functions automatically.
6. Generate a Convex admin key (first run only): `docker compose exec convex ./generate_admin_key.sh` and add it as `CONVEX_SELF_HOSTED_ADMIN_KEY` in `frontend/.env.local`, then re-run `make dev`.
6. Generate a Convex admin key (first run only): `docker compose exec convex ./generate_admin_key.sh` and add it as `CONVEX_SELF_HOSTED_ADMIN_KEY` in `.env`, then re-run `make dev`.

## Architecture

Expand All @@ -35,13 +35,13 @@ Convex functions use `ctx.auth.getUserIdentity()` to get the authenticated user.

## Environment Variables

Docker Compose interpolates variables from the root `.env` file. Key variables:
Root `.env` is the only local env file. Docker Compose, package scripts, Convex helper targets, and benchmarks load it. Key variables:
- `NEXT_PUBLIC_CLERK_PUBLISHABLE_KEY`, `CLERK_SECRET_KEY` — shared by frontend and backend
- `OPENROUTER_API_KEY` — used by backend and Mastra for AI model calls
- `CONVEX_SELF_HOSTED_ADMIN_KEY` — used by backend for system-level Convex writes
- `TINYFISH_API_KEY` — used by the populate agent for web search and fetch (get one at https://agent.tinyfish.ai/api-keys)

The backend container maps `NEXT_PUBLIC_CLERK_PUBLISHABLE_KEY` `CLERK_PUBLISHABLE_KEY` (see `docker-compose.dev.yml`).
The backend accepts `NEXT_PUBLIC_CLERK_PUBLISHABLE_KEY` as the publishable Clerk key, and the Docker backend container also maps it to `CLERK_PUBLISHABLE_KEY` (see `docker-compose.dev.yml`).

## Convex Deploys

Expand Down
34 changes: 19 additions & 15 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,16 +44,12 @@ cd bigset

Create a Clerk application at [dashboard.clerk.com](https://dashboard.clerk.com), then go to **JWT Templates** and enable the **Convex** template.

### 2. Configure env files
### 2. Configure env

```bash
# Root .env — used by Docker for the frontend container
cp .env.example .env
# Fill in NEXT_PUBLIC_CLERK_PUBLISHABLE_KEY and CLERK_SECRET_KEY

# Frontend .env.local — used by Next.js and Convex CLI
cp frontend/.env.example frontend/.env.local
# Fill in all three Clerk keys (publishable, secret, and JWT issuer domain)
# Fill in NEXT_PUBLIC_CLERK_PUBLISHABLE_KEY, CLERK_SECRET_KEY,
# CLERK_JWT_ISSUER_DOMAIN, OPENROUTER_API_KEY, and TINYFISH_API_KEY
```

> **Required for the create-dataset wizard:** set `OPENROUTER_API_KEY` (used by the schema-inference pipeline). Get one at [openrouter.ai](https://openrouter.ai). Without it the wizard's "Generate Schema" step will fail.
Expand All @@ -66,7 +62,11 @@ cp frontend/.env.example frontend/.env.local
make dev
```

This starts all Docker services, waits for Convex to be healthy, and deploys Convex functions automatically. Once it's up:
This starts all Docker services, waits for Convex to be healthy, and deploys Convex functions automatically.
`make dev` checks that root `.env` contains real Clerk/OpenRouter/TinyFish
values before it starts Docker. If it reports a placeholder key, replace that
value first.
Once it is up:

- App: http://localhost:3500
- Convex dashboard: http://localhost:6791
Expand All @@ -78,26 +78,31 @@ This starts all Docker services, waits for Convex to be healthy, and deploys Con
docker compose exec convex ./generate_admin_key.sh
```

Paste the output into `frontend/.env.local` as `CONVEX_SELF_HOSTED_ADMIN_KEY`, then re-run `make dev`.
Paste the output into `.env` as `CONVEX_SELF_HOSTED_ADMIN_KEY`, then re-run
`make dev`.

If `make dev` stops at `CONVEX_SELF_HOSTED_ADMIN_KEY is missing`, that means
Docker/Convex is up far enough for you to run the command above. Generate the
key, paste it into root `.env`, and run `make dev` again.

### 5. Load curated public datasets

The landing page and the dashboard's "Curated" section read from a set of 9 system-owned datasets. Load them with:

```bash
cd frontend
npx convex run publicSeed:seedPublicDatasets
make seed-public-datasets
```

The script is **idempotent** — rerunning it skips datasets that already exist (matched by a stable `seedKey`, so renaming a curated dataset never creates a duplicate). To add a 10th curated dataset, append it to `PUBLIC_DATASETS` in [frontend/convex/publicSeed.ts](frontend/convex/publicSeed.ts) with a fresh `seedKey` and rerun the command. To replace existing curated content in place, pass `force: true`:

```bash
npx convex run publicSeed:seedPublicDatasets '{"force":true}'
cd frontend
node ../scripts/with-root-env.mjs npx convex run publicSeed:seedPublicDatasets '{"force":true}'
```

Open [localhost:3500](http://localhost:3500) and click **Get started** to sign in.

> **Note:** Backend env needs no setup — `backend/.env.example` has correct defaults. If you edit Convex functions in `frontend/convex/`, run `make convex-push` to deploy the changes.
> **Note:** root `.env` is the only local env file. If you edit Convex functions in `frontend/convex/`, run `make convex-push` to deploy the changes.

> **Free tier:** each signed-in account gets **2,500 row operations per calendar month** (resets on the 1st, UTC). The header shows a live usage badge; system-owned curated datasets bypass the quota.

Expand All @@ -123,12 +128,11 @@ Open [localhost:3500](http://localhost:3500) and click **Get started** to sign i
bigset/
├── frontend/ Next.js 16 — UI + Convex schema & functions
│ ├── convex/ Convex functions, schema, authz + quota helpers
│ └── .env.local Clerk + Convex keys (not committed)
├── backend/ Fastify + Mastra — schema inference + (future) agents
│ ├── src/pipeline/ Pure schema-inference fn (called by Fastify + Mastra)
│ └── src/mastra/ Mastra workflows (Studio at :4111 in dev)
├── scripts/ One-off scripts (e.g. verify-authz.sh)
├── .env Clerk keys for docker-compose (not committed)
├── .env Local env for frontend, backend, Convex CLI, benchmarks (not committed)
├── docker-compose.dev.yml
└── Makefile
```
Expand Down
21 changes: 0 additions & 21 deletions backend/.env.example

This file was deleted.

75 changes: 48 additions & 27 deletions backend/BigSet_Data_Collection_Agent/src/agents/agent-goal.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,15 @@ import {
} from "../memory/index.js";
import { agentGoalSchema, type AgentGoal } from "../models/schemas.js";
import type { DatasetSpec, SourceTriageResult } from "../models/schemas.js";
import type { LlmMessage } from "../integrations/openrouter.js";

export const AGENT_BROWSER_ACTION_CONTRACT = `Browser action reporting contract:
- The Tinyfish Agent result JSON MUST include "agent_browser_actions" next to "records".
- "agent_browser_actions" is an ordered array of browser steps the agent actually performed.
- Each action should use this shape when known: { "action": "navigate|click|type|select|wait|extract|screenshot|unknown", "url": "current page URL", "selector": "CSS selector when known", "target_text": "visible button/link/field text when known", "value_description": "safe description of typed/selected value, never secrets", "status": "succeeded|failed", "error": "failure reason if any", "phase": "initial|search|filter|pagination|detail|form|extract", "label": "short human label" }.
- Record navigation, clicks, form fills, pagination, waits that affected extraction, and final extraction.
- If a selector is unknown, still include url plus target_text when visible. If no browser action happened, return an empty array.
- Do not include raw passwords, tokens, cookies, or private user-entered values in value_description.`;

const AGENT_GOAL_SYSTEM = `You are the Navigation Task Agent for a web data collection pipeline.

Expand All @@ -14,8 +23,9 @@ The agent must navigate the site and return structured JSON with extracted data

Rules:
- Be specific about what to click, search, filter, or paginate.
- State the exact JSON shape to return: { "records": [ { column_name: value, ... } ] }
- State the exact JSON shape to return: { "records": [ { column_name: value, ... } ], "agent_browser_actions": [ ... ] }
- Include column names from the schema in the goal.
- Include the browser action reporting contract verbatim enough that the Tinyfish Agent knows it must report replay-oriented actions.
- For forms: describe fields to fill and how to submit.
- For detail follow-up: explain how to open each item and which fields to collect.
- Limit scope (e.g. first 25 rows) to keep runs reliable.
Expand All @@ -31,34 +41,45 @@ export async function generateAgentGoal(options: {
focusFields?: string[];
memory?: WorkflowMemory;
}): Promise<AgentGoal> {
const columnList = options.spec.columns
.map((c) => `${c.name} (${c.type}${c.required ? ", required" : ""})`)
.join(", ");

return completeJson({
label: `agent_goal:${options.triage.final_url}`,
schema: agentGoalSchema,
messages: [
{ role: "system", content: AGENT_GOAL_SYSTEM },
{
role: "user",
content: JSON.stringify({
user_prompt: options.userPrompt,
triage_status: options.triage.status,
triage_reasoning: options.triage.reasoning,
suggested_action: options.triage.suggested_action,
page_url: options.triage.final_url,
page_title: options.triage.title,
row_grain: options.spec.row_grain,
columns: columnList,
focus_fields: options.focusFields ?? [],
extraction_hints: options.spec.extraction_hints,
workflow_memory: options.memory
? memoryContextForAgents(options.memory)
: undefined,
output_shape: { goal: "string", rationale: "string" },
}),
},
],
messages: buildAgentGoalMessages(options),
});
}

export function buildAgentGoalMessages(options: {
userPrompt: string;
spec: DatasetSpec;
triage: SourceTriageResult;
focusFields?: string[];
memory?: WorkflowMemory;
}): LlmMessage[] {
const columnList = options.spec.columns
.map((c) => `${c.name} (${c.type}${c.required ? ", required" : ""})`)
.join(", ");

return [
{ role: "system", content: AGENT_GOAL_SYSTEM },
{
role: "user",
content: JSON.stringify({
user_prompt: options.userPrompt,
triage_status: options.triage.status,
triage_reasoning: options.triage.reasoning,
suggested_action: options.triage.suggested_action,
page_url: options.triage.final_url,
page_title: options.triage.title,
row_grain: options.spec.row_grain,
columns: columnList,
focus_fields: options.focusFields ?? [],
extraction_hints: options.spec.extraction_hints,
browser_action_reporting_contract: AGENT_BROWSER_ACTION_CONTRACT,
workflow_memory: options.memory
? memoryContextForAgents(options.memory)
: undefined,
output_shape: { goal: "string", rationale: "string" },
}),
},
];
}
29 changes: 27 additions & 2 deletions backend/BigSet_Data_Collection_Agent/src/agents/source-policy.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import type {
SourceTriageResult,
} from "../models/schemas.js";
import { scoreDocsUrlForOfficialSource } from "../records/source-urls.js";
import { getDomain } from "../utils/url.js";
import { getDomain, normalizeUrl } from "../utils/url.js";

export interface PromptSourceEntity {
name: string;
Expand All @@ -17,6 +17,7 @@ export interface PromptSourcePolicy {
requiresOfficialSource: boolean;
entities: PromptSourceEntity[];
searchPhrases: string[];
explicitSourceUrls: string[];
hint?: string;
}

Expand Down Expand Up @@ -55,6 +56,14 @@ function uniqueStrings(values: string[]): string[] {
return [...new Set(values.map((value) => value.trim()).filter(Boolean))];
}

function extractPromptSourceUrls(prompt: string): string[] {
return uniqueStrings(
[...prompt.matchAll(/https?:\/\/[^\s)"'<>]+/gi)].map((match) =>
normalizeUrl((match[0] ?? "").replace(/[.,;:!?]+$/g, "")),
),
);
}

function tokenize(value: string): string[] {
return value
.toLowerCase()
Expand Down Expand Up @@ -157,6 +166,7 @@ export function derivePromptSourcePolicy(prompt: string): PromptSourcePolicy {
const taskText = taskTextFromPrompt(prompt);
const entities = extractExplicitEntities(taskText);
const searchPhrases = searchPhrasesForPrompt(taskText);
const explicitSourceUrls = extractPromptSourceUrls(taskText);
const lower = taskText.toLowerCase();
const asksForCanonicalSource =
searchPhrases.length > 0 ||
Expand All @@ -182,7 +192,7 @@ export function derivePromptSourcePolicy(prompt: string): PromptSourcePolicy {
].join("\n")
: undefined;

return { requiresOfficialSource, entities, searchPhrases, hint };
return { requiresOfficialSource, entities, searchPhrases, explicitSourceUrls, hint };
}

export function promptSourceSearchQueries(policy: PromptSourcePolicy): string[] {
Expand Down Expand Up @@ -236,6 +246,7 @@ export function urlMatchesPromptSourcePolicy(
url: string,
policy: PromptSourcePolicy,
): boolean {
if (urlMatchesExplicitPromptSource(url, policy)) return true;
if (!policy.requiresOfficialSource) return true;
const domain = getDomain(url).toLowerCase();
if (GENERIC_HOSTED_DOMAIN.test(domain)) {
Expand All @@ -246,6 +257,17 @@ export function urlMatchesPromptSourcePolicy(
);
}

function urlMatchesExplicitPromptSource(
url: string,
policy: PromptSourcePolicy,
): boolean {
const normalized = normalizeUrl(url);
return policy.explicitSourceUrls.some((sourceUrl) => {
const explicit = normalizeUrl(sourceUrl);
return normalized === explicit || normalized.startsWith(`${explicit}/`);
});
}

function urlMatchesEntitySourcePolicy(
url: string,
entity: PromptSourceEntity,
Expand Down Expand Up @@ -361,6 +383,9 @@ export function recordMatchesPromptSourcePolicy(
if (urls.length === 0) {
return false;
}
if (urls.some((url) => urlMatchesExplicitPromptSource(url, policy))) {
return true;
}

return urls.some((url) => urlMatchesEntitySourcePolicy(url, entity, policy));
}
Expand Down
Loading