Skip to content

Commit 393c8e0

Browse files
return hybrid tree for extract() with no arguments (#731)
* return hybrid tree * Update .changeset/blue-beds-thank.md Co-authored-by: Anirudh Kamath <[email protected]> --------- Co-authored-by: Anirudh Kamath <[email protected]>
1 parent c7424ec commit 393c8e0

File tree

2 files changed

+17
-39
lines changed

2 files changed

+17
-39
lines changed

.changeset/blue-beds-thank.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"@browserbasehq/stagehand-lib": minor
3+
---
4+
5+
make extract() with no arguments return the hybrid tree instead of text-rendered webpage

lib/handlers/extractHandler.ts

Lines changed: 12 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -138,46 +138,19 @@ export class StagehandExtractHandler {
138138
}
139139
}
140140

141-
private async extractPageText(): Promise<{ page_text?: string }> {
142-
await this.stagehandPage._waitForSettledDom();
143-
144-
const originalDOM = await this.stagehandPage.page.evaluate(() =>
145-
window.storeDOM(undefined),
146-
);
147-
148-
const { selectorMap }: { selectorMap: Record<number, string[]> } =
149-
await this.stagehand.page.evaluate(() =>
150-
window.processAllOfDom(undefined),
151-
);
152-
153-
await this.stagehand.page.evaluate(() =>
154-
window.createTextBoundingBoxes(undefined),
155-
);
156-
157-
const containerDims = await this.getTargetDimensions();
158-
159-
const allAnnotations = await this.collectAllAnnotations(
160-
selectorMap,
161-
containerDims.width,
162-
containerDims.height,
163-
containerDims.offsetLeft,
164-
containerDims.offsetTop,
165-
);
166-
167-
const deduplicatedTextAnnotations =
168-
this.deduplicateAnnotations(allAnnotations);
169-
170-
await this.stagehandPage.page.evaluate(
171-
(dom) => window.restoreDOM(dom, undefined),
172-
originalDOM,
173-
);
174-
175-
const formattedText = formatText(
176-
deduplicatedTextAnnotations,
177-
containerDims.width,
178-
);
141+
private async extractPageText(
142+
domSettleTimeoutMs?: number,
143+
): Promise<{ page_text?: string }> {
144+
await this.stagehandPage._waitForSettledDom(domSettleTimeoutMs);
145+
const tree = await getAccessibilityTree(this.stagehandPage, this.logger);
146+
this.logger({
147+
category: "extraction",
148+
message: "Getting accessibility tree data",
149+
level: 1,
150+
});
151+
const outputString = tree.simplified;
179152

180-
const result = { page_text: formattedText };
153+
const result = { page_text: outputString };
181154
return pageTextSchema.parse(result);
182155
}
183156

0 commit comments

Comments
 (0)