Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/quick-games-try.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@browserbasehq/stagehand": patch
---

Add structured output to agent result + ensure close tool is always called
4 changes: 0 additions & 4 deletions packages/core/lib/v3/agent/prompts/agentSystemPrompt.ts
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,6 @@ function buildToolsSection(
{ name: "wait", description: "Wait for a specified time" },
{ name: "navback", description: "Navigate back in browser history" },
{ name: "scroll", description: "Scroll the page x pixels up or down" },
{ name: "close", description: "Mark the task as complete or failed" },
Copy link
Member

@pirate pirate Jan 7, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
{ name: "close", description: "Mark the task as complete or failed" },
{ name: "close", description: "Mark the task as complete or failed" }, // TODO: consider renaming this tool to "done"

];

const domTools: ToolDefinition[] = [
Expand All @@ -92,7 +91,6 @@ function buildToolsSection(
{ name: "wait", description: "Wait for a specified time" },
{ name: "navback", description: "Navigate back in browser history" },
{ name: "scroll", description: "Scroll the page x pixels up or down" },
{ name: "close", description: "Mark the task as complete or failed" },
];

const baseTools = isHybridMode ? hybridTools : domTools;
Expand Down Expand Up @@ -224,8 +222,6 @@ export function buildAgentSystemPrompt(
<item>Always start by understanding the current page state</item>
<item>Use the screenshot tool to verify page state when needed</item>
<item>Use appropriate tools for each action</item>
<item>When the task is complete, use the "close" tool with taskComplete: true</item>
<item>If the task cannot be completed, use "close" with taskComplete: false</item>
</guidelines>
${pageUnderstandingProtocol}
<navigation>
Expand Down
16 changes: 0 additions & 16 deletions packages/core/lib/v3/agent/tools/close.ts

This file was deleted.

4 changes: 1 addition & 3 deletions packages/core/lib/v3/agent/tools/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ import { actTool } from "./act";
import { screenshotTool } from "./screenshot";
import { waitTool } from "./wait";
import { navBackTool } from "./navback";
import { closeTool } from "./close";
import { ariaTreeTool } from "./ariaTree";
import { fillFormTool } from "./fillform";
import { scrollTool, scrollVisionTool } from "./scroll";
Expand Down Expand Up @@ -87,7 +86,7 @@ export function createAgentTools(v3: V3, options?: V3AgentToolOptions) {
ariaTree: ariaTreeTool(v3),
click: clickTool(v3, provider),
clickAndHold: clickAndHoldTool(v3, provider),
close: closeTool(),
//close: closeTool(),
dragAndDrop: dragAndDropTool(v3, provider),
extract: extractTool(v3, executionModel, options?.logger),
fillForm: fillFormTool(v3, executionModel),
Expand Down Expand Up @@ -121,7 +120,6 @@ export type AgentToolTypesMap = {
ariaTree: ReturnType<typeof ariaTreeTool>;
click: ReturnType<typeof clickTool>;
clickAndHold: ReturnType<typeof clickAndHoldTool>;
close: ReturnType<typeof closeTool>;
dragAndDrop: ReturnType<typeof dragAndDropTool>;
extract: ReturnType<typeof extractTool>;
fillForm: ReturnType<typeof fillFormTool>;
Expand Down
128 changes: 128 additions & 0 deletions packages/core/lib/v3/agent/utils/handleCloseToolCall.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
import { generateText, ModelMessage, LanguageModel, ToolSet } from "ai";
import { z } from "zod";
import { tool } from "ai";
import { LogLine } from "../../types/public/logs";
import { StagehandZodObject } from "../../zodCompat";
interface CloseResult {
reasoning: string;
taskComplete: boolean;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think success: true | false may be better

messages: ModelMessage[];
output?: Record<string, unknown>;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I recommend making output required, LLMs are really good at inferring what the ideal output should be for a task.

e.g. if user is researching something often it nails it and puts the exact data they were looking for in output.

}

const baseCloseSchema = z.object({
reasoning: z
.string()
.describe("Brief summary of what actions were taken and the outcome"),
taskComplete: z
.boolean()
.describe("true if the task was fully completed, false otherwise"),
});

/**
* Force a close tool call at the end of an agent run.
* This ensures we always get a structured final response,
* even if the main loop ended without calling close.
*/
export async function handleCloseToolCall(options: {
model: LanguageModel;
inputMessages: ModelMessage[];
instruction: string;
outputSchema?: StagehandZodObject;
logger: (message: LogLine) => void;
}): Promise<CloseResult> {
const { model, inputMessages, instruction, outputSchema, logger } = options;

logger({
category: "agent",
message: "Agent calling tool: close",
level: 1,
});
// Merge base close schema with user-provided output schema if present
const closeToolSchema = outputSchema
? baseCloseSchema.extend({
output: outputSchema.describe(
"The specific data the user requested from this task",
),
})
: baseCloseSchema;

const outputInstructions = outputSchema
? `\n\nThe user also requested the following information from this task. Provide it in the "output" field:\n${JSON.stringify(
Object.fromEntries(
Object.entries(outputSchema.shape).map(([key, value]) => [
key,
value.description || "no description",
]),
),
null,
2,
)}`
: "";

const systemPrompt = `You are a web automation assistant that was tasked with completing a task.
The task was:
"${instruction}"
Review what was accomplished and provide your final assessment in whether the task was completed successfully. you have been provided with the history of the actions taken so far, use this to determine if the task was completed successfully.${outputInstructions}
Call the "close" tool with:
1. A brief summary of what was done
2. Whether the task was completed successfully${outputSchema ? "\n3. The requested output data based on what you found" : ""}`;

const closeTool = tool({
description: outputSchema
? "Complete the task with your assessment and the requested output data."
: "Complete the task with your final assessment.",
inputSchema: closeToolSchema,
execute: async (params) => {
return { success: true, ...params };
},
});

const userPrompt: ModelMessage = {
role: "user",
content: outputSchema
? "Provide your final assessment and the requested output data."
: "Provide your final assessment.",
};

const result = await generateText({
model,
system: systemPrompt,
messages: [...inputMessages, userPrompt],
tools: { close: closeTool } as ToolSet,
toolChoice: { type: "tool", toolName: "close" },
});

const closeToolCall = result.toolCalls.find((tc) => tc.toolName === "close");
const outputMessages: ModelMessage[] = [
userPrompt,
...(result.response?.messages || []),
];

if (!closeToolCall) {
return {
reasoning: result.text || "Task execution completed",
taskComplete: false,
messages: outputMessages,
};
}

const input = closeToolCall.input as z.infer<typeof baseCloseSchema> & {
output?: Record<string, unknown>;
};
logger({
category: "agent",
message: `Task completed`,
level: 1,
});

return {
reasoning: input.reasoning,
taskComplete: input.taskComplete,
messages: outputMessages,
output: input.output,
};
}
10 changes: 8 additions & 2 deletions packages/core/lib/v3/agent/utils/validateExperimentalFeatures.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,10 @@ export interface AgentValidationOptions {
* Validates agent configuration and experimental feature usage.
*
* This utility consolidates all validation checks for both CUA and non-CUA agent paths:
* - Invalid argument errors for CUA (streaming, abort signal, message continuation, excludeTools are not supported)
* - Invalid argument errors for CUA (streaming, abort signal, message continuation, excludeTools, output schema are not supported)
* - Experimental feature checks for integrations and tools (both CUA and non-CUA)
* - Experimental feature checks for hybrid mode (requires experimental: true)
* - Experimental feature checks for non-CUA only (callbacks, signal, messages, streaming, excludeTools)
* - Experimental feature checks for non-CUA only (callbacks, signal, messages, streaming, excludeTools, output schema)
*
* Throws StagehandInvalidArgumentError for invalid/unsupported configurations.
* Throws ExperimentalNotConfiguredError if experimental features are used without experimental mode.
Expand Down Expand Up @@ -56,6 +56,9 @@ export function validateExperimentalFeatures(
) {
unsupportedFeatures.push("excludeTools");
}
if (executeOptions?.output) {
unsupportedFeatures.push("output schema");
}

if (unsupportedFeatures.length > 0) {
throw new StagehandInvalidArgumentError(
Expand Down Expand Up @@ -97,6 +100,9 @@ export function validateExperimentalFeatures(
if (executeOptions.excludeTools && executeOptions.excludeTools.length > 0) {
features.push("excludeTools");
}
if (executeOptions.output) {
features.push("output schema");
}
}

if (features.length > 0) {
Expand Down
Loading
Loading