22
33from __future__ import annotations
44
5+ from autogpt .llm .utils .token_counter import count_string_tokens
6+
57COMMAND_CATEGORY = "web_browse"
68COMMAND_CATEGORY_TITLE = "Web Browsing"
79
810import logging
911from pathlib import Path
1012from sys import platform
11- from typing import Optional , Type
13+ from typing import Optional
1214
1315from bs4 import BeautifulSoup
1416from selenium .common .exceptions import WebDriverException
1517from selenium .webdriver .chrome .options import Options as ChromeOptions
1618from selenium .webdriver .chrome .service import Service as ChromeDriverService
1719from selenium .webdriver .chrome .webdriver import WebDriver as ChromeDriver
1820from selenium .webdriver .common .by import By
21+ from selenium .webdriver .common .options import ArgOptions as BrowserOptions
1922from selenium .webdriver .edge .options import Options as EdgeOptions
2023from selenium .webdriver .edge .service import Service as EdgeDriverService
2124from selenium .webdriver .edge .webdriver import WebDriver as EdgeDriver
3841from autogpt .processing .html import extract_hyperlinks , format_hyperlinks
3942from autogpt .url_utils .validators import validate_url
4043
41- BrowserOptions = ChromeOptions | EdgeOptions | FirefoxOptions | SafariOptions
42-
4344FILE_DIR = Path (__file__ ).parent .parent
45+ TOKENS_TO_TRIGGER_SUMMARY = 50
46+ LINKS_TO_RETURN = 20
4447
4548
4649@command (
@@ -64,25 +67,30 @@ def browse_website(url: str, question: str, agent: Agent) -> str:
6467 question (str): The question asked by the user
6568
6669 Returns:
67- Tuple[ str, WebDriver] : The answer and links to the user and the webdriver
70+ str: The answer and links to the user and the webdriver
6871 """
72+ driver = None
6973 try :
7074 driver , text = scrape_text_with_selenium (url , agent )
75+ add_header (driver )
76+ if TOKENS_TO_TRIGGER_SUMMARY < count_string_tokens (text , agent .llm .name ):
77+ text = summarize_memorize_webpage (url , text , question , agent , driver )
78+
79+ links = scrape_links_with_selenium (driver , url )
80+
81+ # Limit links to LINKS_TO_RETURN
82+ if len (links ) > LINKS_TO_RETURN :
83+ links = links [:LINKS_TO_RETURN ]
84+
85+ return f"Answer gathered from website: { text } \n \n Links: { links } "
7186 except WebDriverException as e :
7287 # These errors are often quite long and include lots of context.
7388 # Just grab the first line.
7489 msg = e .msg .split ("\n " )[0 ]
7590 return f"Error: { msg } "
76-
77- add_header (driver )
78- summary = summarize_memorize_webpage (url , text , question , agent , driver )
79- links = scrape_links_with_selenium (driver , url )
80-
81- # Limit links to 5
82- if len (links ) > 5 :
83- links = links [:5 ]
84- close_browser (driver )
85- return f"Answer gathered from website: { summary } \n \n Links: { links } "
91+ finally :
92+ if driver :
93+ close_browser (driver )
8694
8795
8896def scrape_text_with_selenium (url : str , agent : Agent ) -> tuple [WebDriver , str ]:
@@ -96,7 +104,7 @@ def scrape_text_with_selenium(url: str, agent: Agent) -> tuple[WebDriver, str]:
96104 """
97105 logging .getLogger ("selenium" ).setLevel (logging .CRITICAL )
98106
99- options_available : dict [str , Type [ BrowserOptions ] ] = {
107+ options_available : dict [str , BrowserOptions ] = {
100108 "chrome" : ChromeOptions ,
101109 "edge" : EdgeOptions ,
102110 "firefox" : FirefoxOptions ,
0 commit comments