Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/sixty-apes-crash.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@browserbasehq/stagehand": patch
---

Add focusable map to preserve interactive elements
3 changes: 3 additions & 0 deletions packages/core/lib/v3/types/private/snapshot.ts
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ export type SessionDomIndex = {
absByBe: Map<number, string>;
tagByBe: Map<number, string>;
scrollByBe: Map<number, boolean>;
focusableByBe: Map<number, boolean>;
docRootOf: Map<number, number>;
contentDocRootByIframe: Map<number, number>;
};
Expand All @@ -56,6 +57,7 @@ export type FrameDomMaps = {
tagNameMap: Record<string, string>;
xpathMap: Record<string, string>;
scrollableMap: Record<string, boolean>;
focusableMap: Record<string, boolean>;
urlMap: Record<string, string>;
};

Expand Down Expand Up @@ -103,6 +105,7 @@ export type A11yOptions = {
experimental: boolean;
tagNameMap: Record<string, string>;
scrollableMap: Record<string, boolean>;
focusableMap: Record<string, boolean>;
encode: (backendNodeId: number) => string;
};

Expand Down
21 changes: 19 additions & 2 deletions packages/core/lib/v3/understudy/a11y/snapshot/a11yTree.ts
Original file line number Diff line number Diff line change
Expand Up @@ -151,10 +151,15 @@ export async function buildHierarchicalTree(
const nodeMap = new Map<string, A11yNode>();

for (const n of nodes) {
const isFocusable = n.encodedId
? opts.focusableMap[n.encodedId] === true
: false;

const keep =
!!(n.name && n.name.trim()) ||
!!(n.childIds && n.childIds.length) ||
!isStructural(n.role);
!isStructural(n.role) ||
isFocusable;
if (!keep) continue;
nodeMap.set(n.nodeId, { ...n });
}
Expand All @@ -179,8 +184,20 @@ export async function buildHierarchicalTree(
async function pruneStructuralSafe(node: A11yNode): Promise<A11yNode | null> {
if (+node.nodeId < 0) return null;

const isFocusable = node.encodedId
? opts.focusableMap[node.encodedId] === true
: false;

const children = node.children ?? [];
if (!children.length) {
if (isFocusable) {
let newRole = node.role;
if ((newRole === "generic" || newRole === "none") && node.encodedId) {
const tagName = opts.tagNameMap[node.encodedId];
if (tagName) newRole = tagName;
}
return { ...node, role: newRole };
}
return isStructural(node.role) ? null : node;
}

Expand All @@ -190,7 +207,7 @@ export async function buildHierarchicalTree(

const prunedStatic = removeRedundantStaticTextChildren(node, cleanedKids);

if (isStructural(node.role)) {
if (isStructural(node.role) && !isFocusable) {
if (prunedStatic.length === 1) return prunedStatic[0]!;
if (prunedStatic.length === 0) return null;
}
Expand Down
27 changes: 19 additions & 8 deletions packages/core/lib/v3/understudy/a11y/snapshot/capture.ts
Original file line number Diff line number Diff line change
Expand Up @@ -168,13 +168,14 @@ export async function tryScopedSnapshot(
const sameSessionAsParent =
!!parentId &&
ownerSession(page, parentId) === ownerSession(page, targetFrameId);
const { tagNameMap, xpathMap, scrollableMap } = await domMapsForSession(
owningSess,
targetFrameId,
pierce,
(fid, be) => `${page.getOrdinal(fid)}-${be}`,
sameSessionAsParent,
);
const { tagNameMap, xpathMap, scrollableMap, focusableMap } =
await domMapsForSession(
owningSess,
targetFrameId,
pierce,
(fid, be) => `${page.getOrdinal(fid)}-${be}`,
sameSessionAsParent,
);

const { outline, urlMap, scopeApplied } = await a11yForFrame(
owningSess,
Expand All @@ -184,6 +185,7 @@ export async function tryScopedSnapshot(
tagNameMap,
experimental: options?.experimental ?? false,
scrollableMap,
focusableMap,
encode: (backendNodeId) =>
`${page.getOrdinal(targetFrameId)}-${backendNodeId}`,
},
Expand Down Expand Up @@ -304,6 +306,7 @@ async function collectPerFrameMaps(
const tagNameMap: Record<string, string> = {};
const xpathMap: Record<string, string> = {};
const scrollableMap: Record<string, boolean> = {};
const focusableMap: Record<string, boolean> = {};
const enc = (be: number) => `${page.getOrdinal(frameId)}-${be}`;
const baseAbs = idx.absByBe.get(docRootBe) ?? "/";

Expand All @@ -318,17 +321,25 @@ async function collectPerFrameMaps(
const tag = idx.tagByBe.get(be);
if (tag) tagNameMap[key] = tag;
if (idx.scrollByBe.get(be)) scrollableMap[key] = true;
if (idx.focusableByBe.get(be)) focusableMap[key] = true;
}

const { outline, urlMap } = await a11yForFrame(sess, frameId, {
experimental: options?.experimental ?? false,
tagNameMap,
scrollableMap,
focusableMap,
encode: (backendNodeId) => `${page.getOrdinal(frameId)}-${backendNodeId}`,
});

perFrameOutlines.push({ frameId, outline });
perFrameMaps.set(frameId, { tagNameMap, xpathMap, scrollableMap, urlMap });
perFrameMaps.set(frameId, {
tagNameMap,
xpathMap,
scrollableMap,
focusableMap,
urlMap,
});
}

return { perFrameMaps, perFrameOutlines };
Expand Down
33 changes: 32 additions & 1 deletion packages/core/lib/v3/understudy/a11y/snapshot/domTree.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,31 @@ import {
normalizeXPath,
} from "./xpathUtils";

const NATIVE_FOCUSABLE_TAGS = new Set([
"a",
"button",
"input",
"select",
"textarea",
"details",
"summary",
]);

/** Check if a DOM node is interactive via tag or tabindex >= 0. */
export function isNodeFocusable(node: Protocol.DOM.Node): boolean {
const tag = String(node.nodeName ?? "").toLowerCase();
if (NATIVE_FOCUSABLE_TAGS.has(tag)) return true;

const attrs = node.attributes ?? [];
for (let i = 0; i + 1 < attrs.length; i += 2) {
if (attrs[i]?.toLowerCase() === "tabindex") {
const val = parseInt(attrs[i + 1] ?? "", 10);
if (!isNaN(val) && val >= 0) return true;
}
}
return false;
}

// starting from infinite depth (-1), exponentially shrink down to 1
const DOM_DEPTH_ATTEMPTS = [-1, 256, 128, 64, 32, 16, 8, 4, 2, 1];
const DESCRIBE_DEPTH_ATTEMPTS = [-1, 64, 32, 16, 8, 4, 2, 1];
Expand Down Expand Up @@ -184,6 +209,7 @@ export async function domMapsForSession(
tagNameMap: Record<string, string>;
xpathMap: Record<string, string>;
scrollableMap: Record<string, boolean>;
focusableMap: Record<string, boolean>;
}> {
await session.send("DOM.enable").catch(() => {});
const root = await getDomTreeWithFallback(session, pierce);
Expand All @@ -210,6 +236,7 @@ export async function domMapsForSession(
const tagNameMap: Record<string, string> = {};
const xpathMap: Record<string, string> = {};
const scrollableMap: Record<string, boolean> = {};
const focusableMap: Record<string, boolean> = {};

type StackEntry = { node: Protocol.DOM.Node; xpath: string };
const stack: StackEntry[] = [{ node: startNode, xpath: "" }];
Expand All @@ -223,6 +250,7 @@ export async function domMapsForSession(
xpathMap[encId] = xpath || "/";
const isScrollable = node?.isScrollable === true;
if (isScrollable) scrollableMap[encId] = true;
if (isNodeFocusable(node)) focusableMap[encId] = true;
}

const kids = node.children ?? [];
Expand All @@ -246,7 +274,7 @@ export async function domMapsForSession(
}
}

return { tagNameMap, xpathMap, scrollableMap };
return { tagNameMap, xpathMap, scrollableMap, focusableMap };
}

/**
Expand All @@ -264,6 +292,7 @@ export async function buildSessionDomIndex(
const absByBe = new Map<number, string>();
const tagByBe = new Map<number, string>();
const scrollByBe = new Map<number, boolean>();
const focusableByBe = new Map<number, boolean>();
const docRootOf = new Map<number, number>();
const contentDocRootByIframe = new Map<number, number>();

Expand All @@ -277,6 +306,7 @@ export async function buildSessionDomIndex(
absByBe.set(node.backendNodeId, xp || "/");
tagByBe.set(node.backendNodeId, String(node.nodeName).toLowerCase());
if (node?.isScrollable === true) scrollByBe.set(node.backendNodeId, true);
if (isNodeFocusable(node)) focusableByBe.set(node.backendNodeId, true);
docRootOf.set(node.backendNodeId, docRootBe);
}

Expand Down Expand Up @@ -306,6 +336,7 @@ export async function buildSessionDomIndex(
absByBe,
tagByBe,
scrollByBe,
focusableByBe,
docRootOf,
contentDocRootByIframe,
};
Expand Down
6 changes: 6 additions & 0 deletions packages/core/tests/snapshot-a11y-resolvers.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ describe("a11yForFrame", () => {
experimental: false,
tagNameMap: { "enc-100": "#document", "enc-101": "a" },
scrollableMap: {},
focusableMap: {},
encode: (backend) => `enc-${backend}`,
};

Expand Down Expand Up @@ -113,6 +114,7 @@ describe("a11yForFrame", () => {
experimental: false,
tagNameMap: { "enc-101": "a" },
scrollableMap: {},
focusableMap: {},
encode: (backend) => `enc-${backend}`,
};

Expand All @@ -137,6 +139,7 @@ describe("a11yForFrame", () => {
experimental: false,
tagNameMap: {},
scrollableMap: {},
focusableMap: {},
encode: (backend) => `enc-${backend}`,
};

Expand Down Expand Up @@ -288,6 +291,7 @@ describe("tryScopedSnapshot", () => {
tagNameMap: { "1-10": "div" },
xpathMap: { "1-10": "/div[1]" },
scrollableMap: {},
focusableMap: {},
});
const a11ySpy = vi.spyOn(a11yTree, "a11yForFrame").mockResolvedValue({
outline: "[1-10] div",
Expand Down Expand Up @@ -321,6 +325,7 @@ describe("tryScopedSnapshot", () => {
tagNameMap: { "1-10": "div" },
xpathMap: { "1-10": "/div[1]" },
scrollableMap: {},
focusableMap: {},
});
vi.spyOn(a11yTree, "a11yForFrame").mockResolvedValue({
outline: "ignored",
Expand Down Expand Up @@ -356,6 +361,7 @@ describe("tryScopedSnapshot", () => {
tagNameMap: { "1-10": "div" },
xpathMap: { "1-10": "/div[1]" },
scrollableMap: {},
focusableMap: {},
});
vi.spyOn(a11yTree, "a11yForFrame").mockResolvedValue({
outline: "[1-10] div",
Expand Down
1 change: 1 addition & 0 deletions packages/core/tests/snapshot-a11y-tree-utils.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ const defaultOpts: A11yOptions = {
experimental: false,
tagNameMap: {},
scrollableMap: {},
focusableMap: {},
encode: (backendNodeId: number) => `enc-${backendNodeId}`,
};

Expand Down
9 changes: 9 additions & 0 deletions packages/core/tests/snapshot-frame-merge.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ describe("computeFramePrefixes", () => {
{
tagNameMap: {},
scrollableMap: {},
focusableMap: {},
urlMap: {},
xpathMap: { "0-200": "/html[1]/body[1]/iframe[1]" },
},
Expand Down Expand Up @@ -78,6 +79,7 @@ describe("computeFramePrefixes", () => {
{
tagNameMap: {},
scrollableMap: {},
focusableMap: {},
urlMap: {},
xpathMap: { "0-200": "/iframe[1]" },
},
Expand All @@ -87,6 +89,7 @@ describe("computeFramePrefixes", () => {
{
tagNameMap: {},
scrollableMap: {},
focusableMap: {},
urlMap: {},
xpathMap: { "1-300": "/div[1]/iframe[1]" },
},
Expand Down Expand Up @@ -125,6 +128,7 @@ describe("computeFramePrefixes", () => {
{
tagNameMap: {},
scrollableMap: {},
focusableMap: {},
urlMap: {},
xpathMap: {},
},
Expand Down Expand Up @@ -166,6 +170,7 @@ describe("mergeFramesIntoSnapshot", () => {
{
tagNameMap: {},
scrollableMap: {},
focusableMap: {},
urlMap: { "0-10": "https://example.com" },
xpathMap: { "0-10": "/html[1]/body[1]" },
},
Expand All @@ -175,6 +180,7 @@ describe("mergeFramesIntoSnapshot", () => {
{
tagNameMap: {},
scrollableMap: {},
focusableMap: {},
urlMap: { "1-20": "https://child.com" },
xpathMap: { "1-20": "/div[1]/span[1]" },
},
Expand Down Expand Up @@ -226,6 +232,7 @@ describe("mergeFramesIntoSnapshot", () => {
{
tagNameMap: {},
scrollableMap: {},
focusableMap: {},
urlMap: {},
xpathMap: { "0-10": "/html[1]" },
},
Expand Down Expand Up @@ -270,6 +277,7 @@ describe("mergeFramesIntoSnapshot", () => {
{
tagNameMap: {},
scrollableMap: {},
focusableMap: {},
urlMap: {},
xpathMap: {},
},
Expand Down Expand Up @@ -308,6 +316,7 @@ describe("mergeFramesIntoSnapshot", () => {
{
tagNameMap: {},
scrollableMap: {},
focusableMap: {},
urlMap: {},
xpathMap: {},
},
Expand Down