Skip to content

Commit 7106a65

Browse files
authored
Update stale observe evals (#1615)
# why Stale evals # what changed - `observe_yc_startup.ts`: website changed CSS selectors - `observe_simple_google_search.ts`: switched to press enter instead of clicking potentially opaque google search button # test plan <!-- This is an auto-generated description by cubic. --> --- ## Summary by cubic Updated stale observe evals to match site changes and reduce flakiness. YC companies selectors fixed, and Google search now uses Enter with a short wait. - **Bug Fixes** - YC companies: wait for network idle and update container selectors to _rightCol_zhfs4_592 and _section_zhfs4_163._results_zhfs4_343. - Google search: switch observation to “Press enter” and add a 3s delay before asserting the expected URL. <sup>Written for commit 8126020. Summary will update on new commits. <a href="https://cubic.dev/pr/browserbase/stagehand/pull/1615">Review in cubic</a></sup> <!-- End of auto-generated description by cubic. -->
1 parent 8a3c066 commit 7106a65

2 files changed

Lines changed: 7 additions & 6 deletions

File tree

packages/evals/tasks/observe_simple_google_search.ts

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,13 @@ export const observe_simple_google_search: EvalFunction = async ({
1919
const action1 = observation1[0];
2020
await v3.act(action1);
2121
}
22-
const observation2 = await v3.observe(
23-
"Click the search button in the suggestions dropdown",
24-
);
22+
const observation2 = await v3.observe("Press enter");
2523

2624
if (observation2.length > 0) {
2725
const action2 = observation2[0];
2826
await v3.act(action2);
2927
}
28+
await new Promise((resolve) => setTimeout(resolve, 3000));
3029

3130
const expectedUrl =
3231
"https://browserbase.github.io/stagehand-eval-sites/sites/google/openai.html";

packages/evals/tasks/observe_yc_startup.ts

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,9 @@ export const observe_yc_startup: EvalFunction = async ({
88
}) => {
99
try {
1010
const page = v3.context.pages()[0];
11-
await page.goto("https://www.ycombinator.com/companies");
11+
await page.goto("https://www.ycombinator.com/companies", {
12+
waitUntil: "networkidle",
13+
});
1214

1315
const observations = await v3.observe(
1416
"Click the container element that holds links to each of the startup companies. The companies each have a name, a description, and a link to their website.",
@@ -25,8 +27,8 @@ export const observe_yc_startup: EvalFunction = async ({
2527
}
2628

2729
const possibleLocators = [
28-
`div._rightCol_i9oky_592`,
29-
`div._section_i9oky_163._results_i9oky_343`,
30+
`div._rightCol_zhfs4_592`,
31+
`div._section_zhfs4_163._results_zhfs4_343`,
3032
];
3133

3234
// Precompute candidate backendNodeIds

0 commit comments

Comments
 (0)