fix: browserbase integration

ScrapeGraphAI · Jan 3, 2025 · 752a885 · 752a885
1 parent bc7ae85
commit 752a885
Showing 1 changed file with 12 additions and 40 deletions.
diff --git a/scrapegraphai/docloaders/browser_base.py b/scrapegraphai/docloaders/browser_base.py
@@ -11,57 +11,30 @@ def browser_base_fetch(api_key: str, project_id: str, link: List[str],
 
     This module provides an interface to the BrowserBase API.
 
-    The `browser_base_fetch` function takes three arguments:
-    - `api_key`: The API key provided by BrowserBase.
-    - `project_id`: The ID of the project on BrowserBase where you want to fetch data from.
-    - `link`: The URL or link that you want to fetch data from.
-    - `text_content`: A boolean flag to specify whether to return only the 
-        text content (True) or the full HTML (False).
-    - `async_mode`: A boolean flag that determines whether the function runs asynchronously 
-        (True) or synchronously (False, default).
-
-    It initializes a Browserbase object with the given API key and project ID, 
-    then uses this object to load the specified link. 
-    It returns the result of the loading operation.
-
-    Example usage:
-
-    ```
-    from browser_base_fetch import browser_base_fetch
-
-    result = browser_base_fetch(api_key="your_api_key", 
-    project_id="your_project_id", link="https://example.com")
-    print(result)
-    ```
-
-    Please note that you need to replace "your_api_key" and "your_project_id" 
-    with your actual BrowserBase API key and project ID.
-
     Args:
         api_key (str): The API key provided by BrowserBase.
         project_id (str): The ID of the project on BrowserBase where you want to fetch data from.
-        link (str): The URL or link that you want to fetch data from.
-        text_content (bool): Whether to return only the text content 
-        (True) or the full HTML (False). Defaults to True.
-        async_mode (bool): Whether to run the function asynchronously 
-        (True) or synchronously (False). Defaults to False.
+        link (List[str]): The URLs or links that you want to fetch data from.
+        text_content (bool): Whether to return only the text content (True) or the full HTML (False).
+        async_mode (bool): Whether to run the function asynchronously (True) or synchronously (False).
 
     Returns:
-        object: The result of the loading operation.
+        List[str]: The results of the loading operations.
     """
-
     try:
         from browserbase import Browserbase
     except ImportError:
-        raise ImportError(f"""The browserbase module is not installed. 
-                          Please install it using `pip install browserbase`.""")
+        raise ImportError("The browserbase module is not installed. Please install it using `pip install browserbase`.")
 
-
-    browserbase = Browserbase(api_key=api_key, project_id=project_id)
+    # Initialize client with API key
+    browserbase = Browserbase(api_key=api_key)
+
+    # Create session with project ID
+    session = browserbase.sessions.create(project_id=project_id)
 
     result = []
     async def _async_fetch_link(l):
-        return await asyncio.to_thread(browserbase.load, l, text_content=text_content)
+        return await asyncio.to_thread(session.load, l, text_content=text_content)
 
     if async_mode:
         async def _async_browser_base_fetch():
@@ -72,7 +45,6 @@ async def _async_browser_base_fetch():
         result = asyncio.run(_async_browser_base_fetch())
     else:
         for l in link:
-            result.append(browserbase.load(l, text_content=text_content))
-
+            result.append(session.load(l, text_content=text_content))
 
     return result