Skip to content

Commit 8b80913

Browse files
authored
🐛 fix: correctly handle playright parser (#184)
1 parent 5f1795b commit 8b80913

File tree

1 file changed

+5
-5
lines changed

1 file changed

+5
-5
lines changed

src/aggregators/aggregate_results.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -49,11 +49,8 @@ def discover_tasks() -> Dict[str, List[str]]:
4949

5050
for task_dir in category_dir.iterdir():
5151
if task_dir.is_dir():
52-
# Prefix with original dir name for uniqueness
53-
if task_dir_name == "playwright_webarena":
54-
tasks.append(f"webarena__{category_dir.name}__{task_dir.name}")
55-
else:
56-
tasks.append(f"{category_dir.name}__{task_dir.name}")
52+
# Use unified naming for both playwright and webarena variants
53+
tasks.append(f"{category_dir.name}__{task_dir.name}")
5754

5855
all_tasks[mcp_service] = sorted(tasks)
5956

@@ -70,6 +67,9 @@ def collect_results(exp_dir: Path, k: int) -> Dict[str, Dict[str, Any]]:
7067
continue
7168

7269
model, service = model_service_dir.name.split("__", 1)
70+
# Normalize service name: treat playwright_webarena as playwright
71+
if service == "playwright_webarena":
72+
service = "playwright"
7373

7474
for run_idx in range(1, k + 1):
7575
run_dir = model_service_dir / f"run-{run_idx}"

0 commit comments

Comments
 (0)