tinyfish-io · giaphutran12 · May 22, 2026
diff --git a/backend/src/pipeline/collection-agent-runner.ts b/backend/src/pipeline/collection-agent-runner.ts
@@ -10,6 +10,7 @@ import type {
 import {
   populateProcessTraceFromSteps,
   type PopulateCellValue,
+  type PopulateRuntimeBrowserAction,
   type PopulateRuntimeResult,
   type PopulateRuntimeTraceStep,
 } from "./populate-runtime.js";
@@ -51,6 +52,8 @@ interface CollectionPipelineResult {
       search_queries?: string[];
       fetched_urls?: string[];
       failed_urls?: string[];
+      browser_actions?: CollectionBrowserActionReport[];
+      agent_browser_actions?: CollectionBrowserActionReport[];
     };
     repair?: {
       stats?: CollectionPhaseStats;
@@ -59,6 +62,8 @@ interface CollectionPipelineResult {
     search_queries?: string[];
     fetched_urls?: string[];
     failed_urls?: string[];
+    browser_actions?: CollectionBrowserActionReport[];
+    agent_browser_actions?: CollectionBrowserActionReport[];
     quality?: {
       records?: CollectionRecordQuality[];
     };
@@ -124,9 +129,25 @@ interface CollectionSourceOutcome {
 interface CollectionRepairLoopReport {
   loop_index?: number;
   repair_queries?: string[];
+  browser_actions?: CollectionBrowserActionReport[];
+  agent_browser_actions?: CollectionBrowserActionReport[];
   stats?: CollectionPhaseStats;
 }
 
+interface CollectionBrowserActionReport {
+  action?: string;
+  url?: string;
+  selector?: string;
+  target_text?: string;
+  targetText?: string;
+  value_description?: string;
+  valueDescription?: string;
+  status?: string;
+  error?: string;
+  phase?: string;
+  label?: string;
+}
+
 const AGENT_REQUIRED_TRIAGE_STATUSES = new Set([
   "requires_navigation",
   "requires_form_submission",
@@ -312,8 +333,25 @@ function collectionProcessTrace(input: {
         },
       });
     }
+    steps.push(...browserTraceStepsFromReports({
+      reports: [
+        ...(loop.browser_actions ?? []),
+        ...(loop.agent_browser_actions ?? []),
+      ],
+      defaultPhase: `repair-loop-${loop.loop_index ?? "unknown"}`,
+    }));
   }
 
+  steps.push(...browserTraceStepsFromReports({
+    reports: [
+      ...(report.browser_actions ?? []),
+      ...(report.agent_browser_actions ?? []),
+      ...(report.initial?.browser_actions ?? []),
+      ...(report.initial?.agent_browser_actions ?? []),
+    ],
+    defaultPhase: "initial",
+  }));
+
   for (const outcome of report.sources?.outcomes ?? []) {
     if (!outcome.url) {
       continue;
@@ -358,6 +396,92 @@ function collectionDebugNotes(report: CollectionPipelineResult["report"]): strin
   return notes;
 }
 
+function browserTraceStepsFromReports(input: {
+  reports: CollectionBrowserActionReport[];
+  defaultPhase: string;
+}): PopulateRuntimeTraceStep[] {
+  return input.reports
+    .map((report) => browserTraceStepFromReport({
+      report,
+      defaultPhase: input.defaultPhase,
+    }))
+    .filter((step): step is PopulateRuntimeTraceStep => Boolean(step));
+}
+
+function browserTraceStepFromReport(input: {
+  report: CollectionBrowserActionReport;
+  defaultPhase: string;
+}): PopulateRuntimeTraceStep | undefined {
+  const browserAction = browserActionFromReport(input.report);
+  if (!browserAction) {
+    return undefined;
+  }
+
+  return {
+    kind: "browser",
+    label: input.report.label ?? `collection-browser-${browserAction.action}`,
+    status: browserActionTraceStatus(input.report.status),
+    input: {
+      url: browserAction.url,
+      selector: browserAction.selector,
+      targetText: browserAction.targetText,
+      phase: input.report.phase ?? input.defaultPhase,
+    },
+    error: input.report.error,
+    browserAction,
+  };
+}
+
+function browserActionFromReport(
+  report: CollectionBrowserActionReport
+): PopulateRuntimeBrowserAction | undefined {
+  const action = browserActionKind(report.action);
+  const targetText = report.targetText ?? report.target_text;
+  const valueDescription =
+    report.valueDescription ?? report.value_description;
+  if (!report.url && !report.selector && !targetText) {
+    return undefined;
+  }
+  return {
+    action,
+    url: report.url,
+    selector: report.selector,
+    targetText,
+    valueDescription,
+  };
+}
+
+function browserActionKind(
+  value: string | undefined
+): PopulateRuntimeBrowserAction["action"] {
+  const normalized = value?.trim().toLowerCase();
+  if (
+    normalized === "navigate" ||
+    normalized === "click" ||
+    normalized === "type" ||
+    normalized === "select" ||
+    normalized === "wait" ||
+    normalized === "extract" ||
+    normalized === "screenshot"
+  ) {
+    return normalized;
+  }
+  return "unknown";
+}
+
+function browserActionTraceStatus(
+  value: string | undefined
+): PopulateRuntimeTraceStep["status"] {
+  const normalized = value?.trim().toLowerCase();
+  if (normalized === "failed" || normalized === "error") {
+    return "failed";
+  }
+  if (normalized === "skipped") {
+    return "skipped";
+  }
+  return "succeeded";
+}
+
 function sourceOutcomeTraceKind(outcome: CollectionSourceOutcome): PopulateRuntimeTraceStep["kind"] {
   if (outcome.outcome?.startsWith("agent_")) {
     return "agent";

diff --git a/backend/test/collection-agent-runner.test.ts b/backend/test/collection-agent-runner.test.ts
@@ -2,6 +2,7 @@ import assert from "node:assert/strict";
 import { test } from "node:test";
 
 import { runCollectionPopulatePipeline } from "../src/pipeline/collection-agent-runner.js";
+import { playwrightCandidateReadinessForRun } from "../src/pipeline/populate-playwright-readiness.js";
 
 test("collection agent runner maps vendored pipeline output into populate runtime result", async () => {
   const previousEnv = snapshotEnv([
@@ -53,6 +54,77 @@ test("collection agent runner maps vendored pipeline output into populate runtim
       ),
       true
     );
+    assert.equal(
+      result.debug?.processTrace.steps.some((step) => step.kind === "browser"),
+      false
+    );
+  } finally {
+    restoreEnv(previousEnv);
+  }
+});
+
+test("collection agent runner maps explicit browser action reports into process trace", async () => {
+  const previousEnv = snapshotEnv([
+    "AGENT_POLL_TIMEOUT_MS",
+    "COLLECTION_AGENT_ENABLE_AGENT",
+    "COLLECTION_AGENT_PIPELINE_MODULE",
+    "COLLECTION_AGENT_POLL_TIMEOUT_MS",
+  ]);
+  delete process.env.AGENT_POLL_TIMEOUT_MS;
+  process.env.COLLECTION_AGENT_ENABLE_AGENT = "true";
+  delete process.env.COLLECTION_AGENT_POLL_TIMEOUT_MS;
+  process.env.COLLECTION_AGENT_PIPELINE_MODULE = fakeCollectionPipelineModuleUrl({
+    expectedCalls: [{ agentEnabled: true, pollTimeoutMs: 480_000 }],
+    browserActions: [
+      {
+        action: "hover",
+        url: "https://openai.com/news",
+        status: "succeeded",
+        phase: "initial-browser",
+        label: "browser-open-news",
+      },
+    ],
+    agentBrowserActions: [
+      {
+        action: "click",
+        url: "https://openai.com/news",
+        selector: "a[href*='/news/']",
+        target_text: "Release notes",
+        value_description: "not captured",
+        status: "succeeded",
+      },
+    ],
+  });
+  try {
+    const result = await runCollectionPopulatePipeline(collectionPipelineInput());
+    const browserSteps = result.debug?.processTrace.steps.filter(
+      (step) => step.kind === "browser"
+    ) ?? [];
+
+    assert.equal(browserSteps.length, 2);
+    assert.equal(browserSteps[0]?.browserAction?.action, "unknown");
+    assert.equal(browserSteps[0]?.label, "browser-open-news");
+    assert.deepEqual(browserSteps[0]?.input, {
+      url: "https://openai.com/news",
+      selector: undefined,
+      targetText: undefined,
+      phase: "initial-browser",
+    });
+    assert.equal(browserSteps[0]?.error, undefined);
+    assert.equal(browserSteps[1]?.browserAction?.action, "click");
+    assert.equal(browserSteps[1]?.browserAction?.selector, "a[href*='/news/']");
+    assert.equal(browserSteps[1]?.browserAction?.targetText, "Release notes");
+    assert.equal(browserSteps[1]?.browserAction?.valueDescription, "not captured");
+    assert.equal(browserSteps[1]?.status, "succeeded");
+    assert.deepEqual(
+      playwrightCandidateReadinessForRun({ result }),
+      {
+        status: "ready",
+        reasons: [],
+        browserStepCount: 2,
+        sourceUrlCount: 2,
+      }
+    );
   } finally {
     restoreEnv(previousEnv);
   }
@@ -182,6 +254,8 @@ function fakeCollectionPipelineModuleUrl(input: {
     pollTimeoutMs?: number;
   }>;
   sources?: unknown;
+  browserActions?: unknown;
+  agentBrowserActions?: unknown;
 }): string {
   const source = `
     const moduleLoadPollTimeoutMs = process.env.AGENT_POLL_TIMEOUT_MS ?? null;
@@ -275,6 +349,8 @@ function fakeCollectionPipelineModuleUrl(input: {
             "OpenAI latest AI blog posts",
             "OpenAI release notes",
           ],
+          browser_actions: ${JSON.stringify(input.browserActions ?? [])},
+          agent_browser_actions: ${JSON.stringify(input.agentBrowserActions ?? [])},
           fetched_urls: [
             "https://openai.com/news",
             "https://openai.com/research",

diff --git a/benchmarks/dataset-agent/README.md b/benchmarks/dataset-agent/README.md
@@ -88,6 +88,26 @@ and fetch URLs alone are not enough. The readiness gate expects real browser
 actions such as URL transitions, selectors, target text, or redacted input
 descriptions before any `playwright-candidate-script` can be emitted.
 
+Collection runners can feed those actions through explicit report fields such
+as `browser_actions` or `agent_browser_actions`. BigSet maps only those explicit
+actions into `browser` trace steps; it does not infer selectors or clicks from
+URLs, source outcomes, or prose diagnostics.
+
+Mapping is mechanical:
+
+- `target_text` / `targetText` -> `browserAction.targetText`
+- `value_description` / `valueDescription` -> `browserAction.valueDescription`
+- `status` -> `step.status`
+- `error` -> `step.error`
+- `phase` -> `step.input.phase`
+- unknown action strings -> `browserAction.action = "unknown"`
+
+When both action arrays are present in the same report scope, BigSet preserves
+array order by appending `browser_actions` first and `agent_browser_actions`
+second. This is an ingestion contract for a future Meteor/Mengzhe producer or
+Agent canary; it does not mean the current vendored pipeline already emits
+browser actions.
+
 ## Verify Self-Healing Stack
 
 Use this before asking someone else to migrate a new collection agent into the

diff --git a/docs/data-collection-agent-migration-plan.md b/docs/data-collection-agent-migration-plan.md
@@ -97,6 +97,13 @@ The current layer now can:
 - represent browser actions in the trace contract when a future Agent/canary
   records URL transitions, selectors, target text, or redacted input
   descriptions
+- ingest explicit collection runner `browser_actions` /
+  `agent_browser_actions` report fields into `browser` trace steps without
+  inferring missing clicks, selectors, or form inputs from source URLs
+- map browser action reports mechanically: `target_text` to `targetText`,
+  `value_description` to `valueDescription`, `status` to the trace-step status,
+  `error` to the trace-step error, `phase` to `step.input.phase`, and unknown
+  action names to `browserAction.action = "unknown"`
 - emit a capability diagnostic when no-Agent mode sees pages that need browser,
   form, or detail-page follow-up
 
@@ -111,6 +118,8 @@ The current layer does not yet:
 - compile search/fetch-only traces into Playwright; traces must include
   actionable browser steps before the script compiler is allowed to emit a
   candidate
+- infer browser selectors, clicks, or form values from source outcomes; the
+  collection runner or Agent canary must emit those as explicit action fields
 - run a green live Convex canary in this local environment
 - prove Agent-enabled collection quality on a full real benchmark
 - prove the collection runtime should replace Mastra as the default app runtime
@@ -177,6 +186,8 @@ The current layer does not yet:
    - browser-step trace canary that records URL transitions, selectors/targets,
      and redacted form-input descriptions before any Playwright compiler is
      enabled
+   - confirm the canary emits explicit `agent_browser_actions` or equivalent
+     fields in the collection report; source outcomes alone are not enough
    - full benchmark only after the 2-prompt run is not obviously broken
    - live `--dataset-id` dry-run only after Convex/env prerequisites are ready
    - `--commit` only on a throwaway dataset first
@@ -233,6 +244,12 @@ collection runner ignores `recipeInstructions`, repaired recipes cannot change
 future behavior. If it ignores `requiredColumns` or benchmark metadata, the
 benchmark can stop measuring the same task.
 
+For the Playwright handoff, Meteor can optionally emit `browser_actions` and
+`agent_browser_actions` in the collection report. BigSet preserves each array's
+order and appends `browser_actions` before `agent_browser_actions` when both are
+present in the same report scope. This is a wrapper ingestion contract only; the
+current vendored pipeline is not claimed to emit those fields yet.
+
 The real benchmark command after a runner module exists is:
 
 ```bash