From d818ba38b8785e07f7ec983717502645aa1f7171 Mon Sep 17 00:00:00 2001 From: Edward Tran Date: Sat, 23 May 2026 08:35:30 +0700 Subject: [PATCH] Ask Agent to emit browser actions --- .../src/agents/agent-goal.ts | 75 ++++++++++++------- backend/test/agent-goal-contract.test.ts | 62 +++++++++++++++ benchmarks/dataset-agent/README.md | 4 +- docs/data-collection-agent-migration-plan.md | 5 ++ 4 files changed, 118 insertions(+), 28 deletions(-) create mode 100644 backend/test/agent-goal-contract.test.ts diff --git a/backend/BigSet_Data_Collection_Agent/src/agents/agent-goal.ts b/backend/BigSet_Data_Collection_Agent/src/agents/agent-goal.ts index e84ad75..7df2219 100644 --- a/backend/BigSet_Data_Collection_Agent/src/agents/agent-goal.ts +++ b/backend/BigSet_Data_Collection_Agent/src/agents/agent-goal.ts @@ -5,6 +5,15 @@ import { } from "../memory/index.js"; import { agentGoalSchema, type AgentGoal } from "../models/schemas.js"; import type { DatasetSpec, SourceTriageResult } from "../models/schemas.js"; +import type { LlmMessage } from "../integrations/openrouter.js"; + +export const AGENT_BROWSER_ACTION_CONTRACT = `Browser action reporting contract: +- The Tinyfish Agent result JSON MUST include "agent_browser_actions" next to "records". +- "agent_browser_actions" is an ordered array of browser steps the agent actually performed. +- Each action should use this shape when known: { "action": "navigate|click|type|select|wait|extract|screenshot|unknown", "url": "current page URL", "selector": "CSS selector when known", "target_text": "visible button/link/field text when known", "value_description": "safe description of typed/selected value, never secrets", "status": "succeeded|failed", "error": "failure reason if any", "phase": "initial|search|filter|pagination|detail|form|extract", "label": "short human label" }. +- Record navigation, clicks, form fills, pagination, waits that affected extraction, and final extraction. +- If a selector is unknown, still include url plus target_text when visible. If no browser action happened, return an empty array. +- Do not include raw passwords, tokens, cookies, or private user-entered values in value_description.`; const AGENT_GOAL_SYSTEM = `You are the Navigation Task Agent for a web data collection pipeline. @@ -14,8 +23,9 @@ The agent must navigate the site and return structured JSON with extracted data Rules: - Be specific about what to click, search, filter, or paginate. -- State the exact JSON shape to return: { "records": [ { column_name: value, ... } ] } +- State the exact JSON shape to return: { "records": [ { column_name: value, ... } ], "agent_browser_actions": [ ... ] } - Include column names from the schema in the goal. +- Include the browser action reporting contract verbatim enough that the Tinyfish Agent knows it must report replay-oriented actions. - For forms: describe fields to fill and how to submit. - For detail follow-up: explain how to open each item and which fields to collect. - Limit scope (e.g. first 25 rows) to keep runs reliable. @@ -31,34 +41,45 @@ export async function generateAgentGoal(options: { focusFields?: string[]; memory?: WorkflowMemory; }): Promise { - const columnList = options.spec.columns - .map((c) => `${c.name} (${c.type}${c.required ? ", required" : ""})`) - .join(", "); - return completeJson({ label: `agent_goal:${options.triage.final_url}`, schema: agentGoalSchema, - messages: [ - { role: "system", content: AGENT_GOAL_SYSTEM }, - { - role: "user", - content: JSON.stringify({ - user_prompt: options.userPrompt, - triage_status: options.triage.status, - triage_reasoning: options.triage.reasoning, - suggested_action: options.triage.suggested_action, - page_url: options.triage.final_url, - page_title: options.triage.title, - row_grain: options.spec.row_grain, - columns: columnList, - focus_fields: options.focusFields ?? [], - extraction_hints: options.spec.extraction_hints, - workflow_memory: options.memory - ? memoryContextForAgents(options.memory) - : undefined, - output_shape: { goal: "string", rationale: "string" }, - }), - }, - ], + messages: buildAgentGoalMessages(options), }); } + +export function buildAgentGoalMessages(options: { + userPrompt: string; + spec: DatasetSpec; + triage: SourceTriageResult; + focusFields?: string[]; + memory?: WorkflowMemory; +}): LlmMessage[] { + const columnList = options.spec.columns + .map((c) => `${c.name} (${c.type}${c.required ? ", required" : ""})`) + .join(", "); + + return [ + { role: "system", content: AGENT_GOAL_SYSTEM }, + { + role: "user", + content: JSON.stringify({ + user_prompt: options.userPrompt, + triage_status: options.triage.status, + triage_reasoning: options.triage.reasoning, + suggested_action: options.triage.suggested_action, + page_url: options.triage.final_url, + page_title: options.triage.title, + row_grain: options.spec.row_grain, + columns: columnList, + focus_fields: options.focusFields ?? [], + extraction_hints: options.spec.extraction_hints, + browser_action_reporting_contract: AGENT_BROWSER_ACTION_CONTRACT, + workflow_memory: options.memory + ? memoryContextForAgents(options.memory) + : undefined, + output_shape: { goal: "string", rationale: "string" }, + }), + }, + ]; +} diff --git a/backend/test/agent-goal-contract.test.ts b/backend/test/agent-goal-contract.test.ts new file mode 100644 index 0000000..bad81ef --- /dev/null +++ b/backend/test/agent-goal-contract.test.ts @@ -0,0 +1,62 @@ +import assert from "node:assert/strict"; +import { test } from "node:test"; + +import { + AGENT_BROWSER_ACTION_CONTRACT, + buildAgentGoalMessages, +} from "../BigSet_Data_Collection_Agent/src/agents/agent-goal.js"; + +test("Agent goal prompt requires producer-side browser action reporting", () => { + const messages = buildAgentGoalMessages({ + userPrompt: "Find SaaS pricing pages.", + spec: { + intent_summary: "Find pricing pages.", + target_row_count: 3, + row_grain: "company", + columns: [ + { + name: "company_name", + type: "string", + description: "Company name", + required: true, + }, + { + name: "pricing_url", + type: "string", + description: "Pricing page URL", + required: true, + }, + ], + dedupe_keys: ["company_name"], + search_queries: ["SaaS pricing"], + extraction_hints: "Prefer official pricing pages.", + }, + triage: { + url: "https://example.com", + final_url: "https://example.com/pricing", + title: "Pricing", + status: "requires_navigation", + confidence: 0.9, + source_data_confidence: 0.8, + expected_yield: "partial", + reasoning: "Needs click-through navigation.", + suggested_action: "Open pricing details.", + }, + }); + + const systemPrompt = messages.find((message) => message.role === "system") + ?.content ?? ""; + const userPayload = JSON.parse( + messages.find((message) => message.role === "user")?.content ?? "{}" + ); + + assert.match(systemPrompt, /agent_browser_actions/); + assert.match(systemPrompt, /records/); + assert.match(AGENT_BROWSER_ACTION_CONTRACT, /selector/); + assert.match(AGENT_BROWSER_ACTION_CONTRACT, /target_text/); + assert.match(AGENT_BROWSER_ACTION_CONTRACT, /value_description/); + assert.equal( + userPayload.browser_action_reporting_contract, + AGENT_BROWSER_ACTION_CONTRACT + ); +}); diff --git a/benchmarks/dataset-agent/README.md b/benchmarks/dataset-agent/README.md index 0d7d8f3..7f0f0a5 100644 --- a/benchmarks/dataset-agent/README.md +++ b/benchmarks/dataset-agent/README.md @@ -98,7 +98,9 @@ descriptions before any `playwright-candidate-script` can be emitted. Collection runners can feed those actions through explicit report fields such as `browser_actions` or `agent_browser_actions`. BigSet maps only those explicit actions into `browser` trace steps; it does not infer selectors or clicks from -URLs, source outcomes, or prose diagnostics. +URLs, source outcomes, or prose diagnostics. The collection TinyFish Agent goal +now explicitly asks the Agent to return `agent_browser_actions` next to +`records`, so browser replay evidence starts at the producer contract. Mapping is mechanical: diff --git a/docs/data-collection-agent-migration-plan.md b/docs/data-collection-agent-migration-plan.md index 55becb9..593bd3f 100644 --- a/docs/data-collection-agent-migration-plan.md +++ b/docs/data-collection-agent-migration-plan.md @@ -280,6 +280,11 @@ order and appends `browser_actions` before `agent_browser_actions` when both are present in the same report scope. This is a wrapper ingestion contract only; the current vendored pipeline is not claimed to emit those fields yet. +The TinyFish Agent goal generator now asks the Agent itself to include +`agent_browser_actions` beside `records` in its result JSON. That makes the +producer responsible for ordered navigation/click/type/extract actions instead +of asking the self-healing layer to infer browser behavior after the fact. + If TinyFish Agent result JSON includes explicit `browser_actions` or `agent_browser_actions`, the vendored runner now carries those arrays into the saved Agent run records and phase-scoped run report fields. Generic `actions`