Skip to content

Dev branch for the ToolUseAgent #239

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 30 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
9ee2367
moving the browsergym.experiment.benchmark module to agentlab
TLSDC Apr 23, 2025
c2e2b9c
added comment for new parameter
TLSDC Apr 23, 2025
596fcd2
BaseMessages take into account 'input_text' key too (for xray)
TLSDC Apr 23, 2025
f9d7b91
convenient array to base64 function
TLSDC Apr 23, 2025
73ba428
tool agent embryo
TLSDC Apr 23, 2025
c11db49
Merge branch 'main' of github.com:ServiceNow/AgentLab into tlsdc/tool…
TLSDC Apr 24, 2025
6604dbc
added the MessageBuilder class, which should help interfacing APIs
TLSDC Apr 24, 2025
ef6f648
claude
TLSDC Apr 30, 2025
4e973ac
adding markdown display for MessageBuilder in xray
TLSDC May 1, 2025
54ec412
changed LLM structure to be more versatile
TLSDC May 1, 2025
0fc43cc
unified claude and openai response apis
TLSDC May 2, 2025
19cdaf9
i dont think this is relevant anymore
TLSDC May 2, 2025
5b3f469
backtracking from moving bgym.benchmarks etc
TLSDC May 2, 2025
087ad75
defaulting to claude bc it's better
TLSDC May 2, 2025
8a17470
kind of forced to comment this to avoid circular imports atm
TLSDC May 2, 2025
5f675ba
Merge branch 'main' of github.com:ServiceNow/AgentLab into tlsdc/tool…
TLSDC May 2, 2025
234be09
parametrized env output to agent_args
TLSDC May 2, 2025
544908e
fixing broken import in test
TLSDC May 2, 2025
16cc3cd
Add pricing tracking for Anthropic model and refactor pricing functions
recursix May 8, 2025
c674094
Enhance ToolUseAgent with token counting and improved message handlin…
recursix May 9, 2025
528b513
Update action in ClaudeResponseModel to None for improved clarity
recursix May 9, 2025
417893c
typo
recursix May 13, 2025
c676eab
typo
recursix May 13, 2025
ab2d331
Remove unnecessary import of anthropic for cleaner code
recursix May 13, 2025
bf57591
moving some utils to agent_utils.py
amanjaiswal73892 May 14, 2025
ce72b41
Fix: Formatting ang Darglint
amanjaiswal73892 May 15, 2025
fe05d75
Refactor: Simplify message builder methods and add support for chat c…
amanjaiswal73892 May 15, 2025
97a39cc
added vllm-support-for-tool-use-agent
amanjaiswal73892 May 17, 2025
7d8a08c
Moving some functions to llm utils.py
amanjaiswal73892 May 21, 2025
ffd5c5e
Merge pull request #248 from ServiceNow/aj/tool_use_agent_chat_comple…
amanjaiswal73892 May 21, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/agentlab/agents/agent_args.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import bgym
from bgym import AbstractAgentArgs
from bgym import AbstractAgentArgs, Benchmark


class AgentArgs(AbstractAgentArgs):
Expand All @@ -14,7 +14,7 @@ class MyAgentArgs(AgentArgs):
Note: for working properly with AgentXRay, the arguments need to be serializable and hasable.
"""

def set_benchmark(self, benchmark: bgym.Benchmark, demo_mode: bool):
def set_benchmark(self, benchmark: Benchmark, demo_mode: bool):
"""Optional method to set benchmark specific flags.

This allows the agent to have minor adjustments based on the benchmark.
Expand Down
44 changes: 44 additions & 0 deletions src/agentlab/agents/agent_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
from PIL import Image, ImageDraw
from logging import warning


"""
This module contains utility functions for handling observations and actions in the context of agent interactions.
"""


def tag_screenshot_with_action(screenshot: Image, action: str) -> Image:
"""
If action is a coordinate action, try to render it on the screenshot.

e.g. mouse_click(120, 130) -> draw a dot at (120, 130) on the screenshot

Args:
screenshot: The screenshot to tag.
action: The action to tag the screenshot with.

Returns:
The tagged screenshot.

Raises:
ValueError: If the action parsing fails.
"""
if action.startswith("mouse_click"):
try:
coords = action[action.index("(") + 1 : action.index(")")].split(",")
coords = [c.strip() for c in coords]
if len(coords) not in [2, 3]:
raise ValueError(f"Invalid coordinate format: {coords}")
if coords[0].startswith("x="):
coords[0] = coords[0][2:]
if coords[1].startswith("y="):
coords[1] = coords[1][2:]
x, y = float(coords[0].strip()), float(coords[1].strip())
draw = ImageDraw.Draw(screenshot)
radius = 5
draw.ellipse(
(x - radius, y - radius, x + radius, y + radius), fill="blue", outline="blue"
)
except (ValueError, IndexError) as e:
warning(f"Failed to parse action '{action}': {e}")
return screenshot
10 changes: 3 additions & 7 deletions src/agentlab/agents/dynamic_prompting.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,9 @@
from warnings import warn

import bgym
from bgym import HighLevelActionSetArgs
from browsergym.core.action.base import AbstractActionSet
from browsergym.utils.obs import (
flatten_axtree_to_str,
flatten_dom_to_str,
overlay_som,
prune_html,
)
from browsergym.utils.obs import flatten_axtree_to_str, flatten_dom_to_str, overlay_som, prune_html

from agentlab.llm.llm_utils import (
BaseMessage,
Expand Down Expand Up @@ -99,7 +95,7 @@ class ObsFlags(Flags):

@dataclass
class ActionFlags(Flags):
action_set: bgym.HighLevelActionSetArgs = None # should be set by the set_benchmark method
action_set: HighLevelActionSetArgs = None # should be set by the set_benchmark method
long_description: bool = True
individual_examples: bool = False

Expand Down
13 changes: 7 additions & 6 deletions src/agentlab/agents/generic_agent/agent_configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"""

import bgym
from bgym import HighLevelActionSetArgs

from agentlab.agents import dynamic_prompting as dp
from agentlab.experiments import args
Expand Down Expand Up @@ -32,7 +33,7 @@
filter_visible_elements_only=False,
),
action=dp.ActionFlags(
action_set=bgym.HighLevelActionSetArgs(
action_set=HighLevelActionSetArgs(
subsets=["bid"],
multiaction=False,
),
Expand Down Expand Up @@ -80,7 +81,7 @@
filter_visible_elements_only=False,
),
action=dp.ActionFlags(
action_set=bgym.HighLevelActionSetArgs(
action_set=HighLevelActionSetArgs(
subsets=["bid"],
multiaction=False,
),
Expand Down Expand Up @@ -127,7 +128,7 @@
filter_visible_elements_only=False,
),
action=dp.ActionFlags(
action_set=bgym.HighLevelActionSetArgs(
action_set=HighLevelActionSetArgs(
subsets=["bid"],
multiaction=False,
),
Expand Down Expand Up @@ -177,7 +178,7 @@
filter_visible_elements_only=False,
),
action=dp.ActionFlags(
action_set=bgym.HighLevelActionSetArgs(
action_set=HighLevelActionSetArgs(
subsets=["bid"],
multiaction=True,
),
Expand Down Expand Up @@ -232,7 +233,7 @@
filter_visible_elements_only=False,
),
action=dp.ActionFlags(
action_set=bgym.HighLevelActionSetArgs(
action_set=HighLevelActionSetArgs(
subsets=["bid"],
multiaction=False,
),
Expand Down Expand Up @@ -323,7 +324,7 @@
filter_visible_elements_only=args.Choice([True, False], p=[0.3, 0.7]),
),
action=dp.ActionFlags(
action_set=bgym.HighLevelActionSetArgs(
action_set=HighLevelActionSetArgs(
subsets=args.Choice([["bid"], ["bid", "coord"]]),
multiaction=args.Choice([True, False], p=[0.7, 0.3]),
),
Expand Down
5 changes: 3 additions & 2 deletions src/agentlab/agents/generic_agent/generic_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,11 @@

from copy import deepcopy
from dataclasses import asdict, dataclass
from functools import partial
from warnings import warn

import bgym
from bgym import Benchmark
from browsergym.experiments.agent import Agent, AgentInfo

from agentlab.agents import dynamic_prompting as dp
Expand All @@ -22,7 +24,6 @@
from agentlab.llm.tracking import cost_tracker_decorator

from .generic_agent_prompt import GenericPromptFlags, MainPrompt
from functools import partial


@dataclass
Expand All @@ -37,7 +38,7 @@ def __post_init__(self):
except AttributeError:
pass

def set_benchmark(self, benchmark: bgym.Benchmark, demo_mode):
def set_benchmark(self, benchmark: Benchmark, demo_mode):
"""Override Some flags based on the benchmark."""
if benchmark.name.startswith("miniwob"):
self.flags.obs.use_html = True
Expand Down
3 changes: 2 additions & 1 deletion src/agentlab/agents/generic_agent/reproducibility_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from pathlib import Path

import bgym
from bgym import HighLevelActionSetArgs
from browsergym.experiments.agent import AgentInfo
from bs4 import BeautifulSoup

Expand Down Expand Up @@ -144,7 +145,7 @@ def _make_backward_compatible(agent_args: GenericAgentArgs):
if isinstance(action_set, str):
action_set = action_set.split("+")

agent_args.flags.action.action_set = bgym.HighLevelActionSetArgs(
agent_args.flags.action.action_set = HighLevelActionSetArgs(
subsets=action_set,
multiaction=agent_args.flags.action.multi_actions,
)
Expand Down
Empty file.
Loading
Loading