Skip to content

Commit

Permalink
fix: browserbase integration
Browse files Browse the repository at this point in the history
  • Loading branch information
VinciGit00 committed Jan 3, 2025
1 parent bc7ae85 commit 752a885
Showing 1 changed file with 12 additions and 40 deletions.
52 changes: 12 additions & 40 deletions scrapegraphai/docloaders/browser_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,57 +11,30 @@ def browser_base_fetch(api_key: str, project_id: str, link: List[str],
This module provides an interface to the BrowserBase API.
The `browser_base_fetch` function takes three arguments:
- `api_key`: The API key provided by BrowserBase.
- `project_id`: The ID of the project on BrowserBase where you want to fetch data from.
- `link`: The URL or link that you want to fetch data from.
- `text_content`: A boolean flag to specify whether to return only the
text content (True) or the full HTML (False).
- `async_mode`: A boolean flag that determines whether the function runs asynchronously
(True) or synchronously (False, default).
It initializes a Browserbase object with the given API key and project ID,
then uses this object to load the specified link.
It returns the result of the loading operation.
Example usage:
```
from browser_base_fetch import browser_base_fetch
result = browser_base_fetch(api_key="your_api_key",
project_id="your_project_id", link="https://example.com")
print(result)
```
Please note that you need to replace "your_api_key" and "your_project_id"
with your actual BrowserBase API key and project ID.
Args:
api_key (str): The API key provided by BrowserBase.
project_id (str): The ID of the project on BrowserBase where you want to fetch data from.
link (str): The URL or link that you want to fetch data from.
text_content (bool): Whether to return only the text content
(True) or the full HTML (False). Defaults to True.
async_mode (bool): Whether to run the function asynchronously
(True) or synchronously (False). Defaults to False.
link (List[str]): The URLs or links that you want to fetch data from.
text_content (bool): Whether to return only the text content (True) or the full HTML (False).
async_mode (bool): Whether to run the function asynchronously (True) or synchronously (False).
Returns:
object: The result of the loading operation.
List[str]: The results of the loading operations.
"""

try:
from browserbase import Browserbase
except ImportError:
raise ImportError(f"""The browserbase module is not installed.
Please install it using `pip install browserbase`.""")
raise ImportError("The browserbase module is not installed. Please install it using `pip install browserbase`.")


browserbase = Browserbase(api_key=api_key, project_id=project_id)
# Initialize client with API key
browserbase = Browserbase(api_key=api_key)

# Create session with project ID
session = browserbase.sessions.create(project_id=project_id)

result = []
async def _async_fetch_link(l):
return await asyncio.to_thread(browserbase.load, l, text_content=text_content)
return await asyncio.to_thread(session.load, l, text_content=text_content)

if async_mode:
async def _async_browser_base_fetch():
Expand All @@ -72,7 +45,6 @@ async def _async_browser_base_fetch():
result = asyncio.run(_async_browser_base_fetch())
else:
for l in link:
result.append(browserbase.load(l, text_content=text_content))

result.append(session.load(l, text_content=text_content))

return result

0 comments on commit 752a885

Please sign in to comment.