Skip to content

Commit fecedb1

Browse files
committed
enable tracing
1 parent bd43467 commit fecedb1

File tree

2 files changed

+20
-9
lines changed

2 files changed

+20
-9
lines changed

browsergym/webarena_verified/src/browsergym/webarena_verified/evaluators.py

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3,17 +3,12 @@
33
from platform-labs-agent-eval-harness.
44
"""
55

6-
import asyncio
7-
import importlib
86
import json
97
import logging
10-
from datetime import datetime
8+
import tempfile
119
from pathlib import Path
12-
from typing import Any, Dict, Optional
1310

1411
import playwright
15-
from agent_eval_harness_common.models import AllocationResource, WebsiteRequirement
16-
from playwright.async_api import async_playwright
1712

1813
from browsergym.webarena.instance import WebArenaInstance
1914
from webarena_verified.api.evaluator_api import TaskEvaluator
@@ -75,14 +70,27 @@ def __call__(
7570
Returns:
7671
Float score compatible with BrowserGym (1.0 or 0.0)
7772
"""
73+
# import webarena dynamically
74+
from webarena.browser_env.actions import ActionTypes
75+
# if last action is not a STOP action, return 0.0 as the task is not completed yet
76+
if trajectory[-1].get("action_type") != ActionTypes.STOP:
77+
return 0.0
78+
79+
# task is done: load the config file, stop playwright tracing, and evaluate the trace
7880
with open(config_file, "r") as f:
79-
config = json.load(f)
81+
config_raw = json.load(f)
82+
config: WebArenaVerifiedTask = WebArenaVerifiedTask.model_validate(config_raw)
83+
84+
# stop playwright tracing
85+
with tempfile.TemporaryDirectory() as temp_dir:
86+
trace_path = Path(temp_dir) / f"wav_{config.task_id}.zip"
87+
page.context.tracing.stop(path=trace_path)
8088

8189
# create eval request
8290
eval_request = WebarenaTaskEvalRequest(
83-
task=WebArenaVerifiedTask.model_validate(config),
91+
task=config,
8492
agent_response_raw=trajectory[-1].get("answer"),
85-
network_trace=NetworkTrace.from_playwright_trace(...), # TODO: add path to playwright trace should be Path(exp_args.exp_dir / "pw_traces" / f"{exp_args.exp_name}.zip")
93+
network_trace=NetworkTrace.from_playwright_trace(trace_path), # TODO: add path to playwright trace should be Path(exp_args.exp_dir / "pw_traces" / f"{exp_args.exp_name}.zip")
8694
)
8795

8896
# Run wa_verified evaluation and return float score

browsergym/webarena_verified/src/browsergym/webarena_verified/task.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,9 @@ def setup(self, page: playwright.sync_api.Page) -> tuple[str, dict]:
102102
for site in self.config["sites"]:
103103
self.webarena_instance.ui_login(site=site, page=page)
104104

105+
# enable playwright tracing (required for webarena_verified evaluation)
106+
page.context.tracing.start(snapshots=True)
107+
105108
# set geolocation if specified
106109
if self.config.get("geolocation"):
107110
page.context.set_geolocation(self.config["geolocation"])

0 commit comments

Comments
 (0)