@@ -138,46 +138,19 @@ export class StagehandExtractHandler {
138
138
}
139
139
}
140
140
141
- private async extractPageText ( ) : Promise < { page_text ?: string } > {
142
- await this . stagehandPage . _waitForSettledDom ( ) ;
143
-
144
- const originalDOM = await this . stagehandPage . page . evaluate ( ( ) =>
145
- window . storeDOM ( undefined ) ,
146
- ) ;
147
-
148
- const { selectorMap } : { selectorMap : Record < number , string [ ] > } =
149
- await this . stagehand . page . evaluate ( ( ) =>
150
- window . processAllOfDom ( undefined ) ,
151
- ) ;
152
-
153
- await this . stagehand . page . evaluate ( ( ) =>
154
- window . createTextBoundingBoxes ( undefined ) ,
155
- ) ;
156
-
157
- const containerDims = await this . getTargetDimensions ( ) ;
158
-
159
- const allAnnotations = await this . collectAllAnnotations (
160
- selectorMap ,
161
- containerDims . width ,
162
- containerDims . height ,
163
- containerDims . offsetLeft ,
164
- containerDims . offsetTop ,
165
- ) ;
166
-
167
- const deduplicatedTextAnnotations =
168
- this . deduplicateAnnotations ( allAnnotations ) ;
169
-
170
- await this . stagehandPage . page . evaluate (
171
- ( dom ) => window . restoreDOM ( dom , undefined ) ,
172
- originalDOM ,
173
- ) ;
174
-
175
- const formattedText = formatText (
176
- deduplicatedTextAnnotations ,
177
- containerDims . width ,
178
- ) ;
141
+ private async extractPageText (
142
+ domSettleTimeoutMs ?: number ,
143
+ ) : Promise < { page_text ?: string } > {
144
+ await this . stagehandPage . _waitForSettledDom ( domSettleTimeoutMs ) ;
145
+ const tree = await getAccessibilityTree ( this . stagehandPage , this . logger ) ;
146
+ this . logger ( {
147
+ category : "extraction" ,
148
+ message : "Getting accessibility tree data" ,
149
+ level : 1 ,
150
+ } ) ;
151
+ const outputString = tree . simplified ;
179
152
180
- const result = { page_text : formattedText } ;
153
+ const result = { page_text : outputString } ;
181
154
return pageTextSchema . parse ( result ) ;
182
155
}
183
156
0 commit comments