Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions ag2-web-research/.env.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
FIRECRAWL_API_KEY=fc-your_firecrawl_api_key
OPENAI_API_KEY=your_openai_api_key
OPENAI_BASE_URL=https://api.openai.com/v1 # override for Azure, SambaNova, etc.
LLM_MODEL=gpt-4o-mini
45 changes: 45 additions & 0 deletions ag2-web-research/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# AG2 Web Research Pipeline with Firecrawl

A multi-agent web research pipeline using [AG2](https://github.com/ag2ai/ag2) (formerly AutoGen)
and [Firecrawl](https://www.firecrawl.dev/).

Three specialist agents run in sequence via `GroupChat`:

```
searcher ──> scraper ──> reporter
│ │ │
search_web() scrape_pages() deep_research()
(Firecrawl) (Firecrawl) (Firecrawl)
```

## Features

- **AG2 GroupChat** — three specialist agents with `round_robin` ordering; each runs once in sequence
- **Firecrawl tools** registered via `@register_for_llm` / `@register_for_execution` — tool description and execution are separate, independently replaceable
- **Firecrawl deep research** — premium analysis tool available to the reporter for additional depth

## Prerequisites

- [Firecrawl API key](https://www.firecrawl.dev/) (free tier works for search + scrape)
- OpenAI API key (or compatible endpoint)

## Quick Start

```bash
cd ag2-web-research
pip install -r requirements.txt
cp .env.example .env # add FIRECRAWL_API_KEY and OPENAI_API_KEY
python main.py "Latest developments in AI agent frameworks"
```

## How It Works

1. **searcher** calls `firecrawl.search()` to find relevant pages across multiple angles of the topic
2. **scraper** reads the searcher's findings, selects the top 3 URLs, and calls `firecrawl.scrape_url()` on each
3. **reporter** synthesises all content into a structured Markdown report; may call `firecrawl.deep_research()` for additional depth

## AG2 Concepts Demonstrated

- `GroupChat` with `speaker_selection_method="round_robin"` — deterministic agent ordering
- `@register_for_llm` / `@register_for_execution` decorator pattern — LLM tool description separated from execution
- `is_termination_msg` — reporter signals pipeline completion
146 changes: 146 additions & 0 deletions ag2-web-research/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
"""
Multi-agent web research pipeline using AG2 (formerly AutoGen).

Three specialist agents collaborate in a fixed sequence via GroupChat:
searcher — finds relevant pages via Firecrawl search
scraper — extracts content from top results via Firecrawl scrape
reporter — synthesises findings into a structured Markdown report

Tools are registered using AG2's @register_for_llm / @register_for_execution
decorator pattern, separating tool description (for the LLM) from execution.
"""
import os
from dotenv import load_dotenv
from firecrawl import FirecrawlApp
from autogen import ConversableAgent, GroupChat, GroupChatManager, UserProxyAgent

load_dotenv()

# ── Firecrawl client ───────────────────────────────────────────────────────────

firecrawl = FirecrawlApp(api_key=os.environ["FIRECRAWL_API_KEY"])

# ── LLM config ────────────────────────────────────────────────────────────────

llm_config = {
"config_list": [{
"model": os.getenv("LLM_MODEL", "gpt-4o-mini"),
"api_key": os.environ["OPENAI_API_KEY"],
"base_url": os.getenv("OPENAI_BASE_URL", "https://api.openai.com/v1"),
}],
"temperature": 0.3,
"cache_seed": None,
}

# ── Tool functions ─────────────────────────────────────────────────────────────

def search_web(query: str, num_results: int = 5) -> str:
"""Search the web using Firecrawl and return top results with URLs and snippets."""
results = firecrawl.search(query, limit=num_results)
formatted = "\n".join(
f"{i+1}. {r.get('title', 'No title')}\n URL: {r.get('url', '')}\n {r.get('description', '')}"
for i, r in enumerate(results.get("data", []))
)
return f"Found {len(results.get('data', []))} results:\n\n{formatted}"


def scrape_pages(urls: list[str]) -> str:
"""Scrape and extract Markdown content from a list of URLs using Firecrawl (max 3)."""
scraped = []
for url in urls[:3]:
try:
result = firecrawl.scrape_url(url, formats=["markdown"])
content = result.get("markdown", "")[:2000]
scraped.append({"url": url, "content": content})
except Exception as exc:
scraped.append({"url": url, "error": str(exc)})
return "\n\n".join(
f"**{s['url']}**\n{s.get('content', s.get('error', 'No content'))}"
for s in scraped
)


def deep_research(topic: str) -> str:
"""Run Firecrawl deep research on a topic for comprehensive analysis."""
result = firecrawl.deep_research(topic, max_depth=3, time_limit=60)
return result.get("data", {}).get("finalAnalysis", "Deep research returned no analysis.")


# ── Agents ─────────────────────────────────────────────────────────────────────

searcher = ConversableAgent(
name="searcher",
system_message=(
"You are a web research specialist. Use the search_web tool to find the most "
"relevant and recent pages on the research topic. Perform at least 3 searches "
"covering different angles. Report the URLs and key findings."
),
llm_config=llm_config,
)

scraper = ConversableAgent(
name="scraper",
system_message=(
"You are a content extraction specialist. Review the searcher's findings, "
"identify the top 3 most relevant URLs, and use the scrape_pages tool to extract "
"their full content. Summarise the key information from each page."
),
llm_config=llm_config,
)

reporter = ConversableAgent(
name="reporter",
system_message=(
"You are a research analyst and writer. Synthesise all findings from the searcher "
"and scraper into a comprehensive Markdown report with:\n"
"## Executive Summary\n"
"## Key Findings\n"
"## Detailed Analysis\n"
"## Sources\n"
"## Conclusion\n"
"You may use the deep_research tool for additional depth on key points. "
"End your final message with TERMINATE."
),
llm_config=llm_config,
is_termination_msg=lambda m: "TERMINATE" in (m.get("content") or ""),
)

# ── Tool registration ──────────────────────────────────────────────────────────
# @register_for_llm — provides tool description to the agent's LLM
# @register_for_execution — provides the callable so the agent can run it

searcher.register_for_llm(name="search_web", description=search_web.__doc__)(search_web)
searcher.register_for_execution(name="search_web")(search_web)

scraper.register_for_llm(name="scrape_pages", description=scrape_pages.__doc__)(scrape_pages)
scraper.register_for_execution(name="scrape_pages")(scrape_pages)

reporter.register_for_llm(name="deep_research", description=deep_research.__doc__)(deep_research)
reporter.register_for_execution(name="deep_research")(deep_research)

# ── GroupChat ──────────────────────────────────────────────────────────────────

groupchat = GroupChat(
agents=[searcher, scraper, reporter],
messages=[],
max_round=12,
speaker_selection_method="round_robin",
)
manager = GroupChatManager(
groupchat=groupchat,
llm_config=llm_config,
is_termination_msg=lambda m: "TERMINATE" in (m.get("content") or ""),
)

# ── Entry point ───────────────────────────────────────────────────────────────

def run_research(topic: str) -> None:
user = UserProxyAgent(name="user", human_input_mode="NEVER", code_execution_config=False)
user.initiate_chat(manager, message=f"Research this topic thoroughly: {topic}")


if __name__ == "__main__":
import sys
topic = " ".join(sys.argv[1:]) or "Latest developments in AI agent frameworks 2025"
print(f"\n=== AG2 Web Research Pipeline ===\nTopic: {topic}\n")
run_research(topic)
3 changes: 3 additions & 0 deletions ag2-web-research/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
ag2[openai]>=0.11.0
firecrawl-py>=1.5.0
python-dotenv>=1.0.0
26 changes: 26 additions & 0 deletions ag2-web-research/tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
"""
Stub firecrawl and add project root to sys.path before test collection.
test_tools.py patches firecrawl.FirecrawlApp at module level, so the stub
must be in sys.modules before that file is imported.
"""
import sys
import os
from types import ModuleType
from unittest.mock import MagicMock

# Add project root so 'import main' works
parent = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
if parent not in sys.path:
sys.path.insert(0, parent)


def _stub_module(name: str, **attrs) -> ModuleType:
m = ModuleType(name)
for k, v in attrs.items():
setattr(m, k, v)
sys.modules[name] = m
return m


# Stub firecrawl so patch("firecrawl.FirecrawlApp") can resolve the module
_stub_module("firecrawl", FirecrawlApp=MagicMock())
33 changes: 33 additions & 0 deletions ag2-web-research/tests/test_agent_setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
"""Verify GroupChat agent setup without initiating chat or LLM calls."""
import os
os.environ.setdefault("FIRECRAWL_API_KEY", "test")
os.environ.setdefault("OPENAI_API_KEY", "test")

def test_agents_created():
from unittest.mock import patch
with patch("firecrawl.FirecrawlApp"):
import main as m
assert m.searcher.name == "searcher"
assert m.scraper.name == "scraper"
assert m.reporter.name == "reporter"

def test_termination_condition():
from unittest.mock import patch
with patch("firecrawl.FirecrawlApp"):
import main as m
assert m.reporter._is_termination_msg({"content": "Report. TERMINATE"}) is True
assert m.reporter._is_termination_msg({"content": "Still working"}) is False

def test_tools_registered_on_searcher():
from unittest.mock import patch
with patch("firecrawl.FirecrawlApp"):
import main as m
registered = getattr(m.searcher, "_function_map", {})
assert "search_web" in registered

def test_groupchat_round_robin():
from unittest.mock import patch
with patch("firecrawl.FirecrawlApp"):
import main as m
assert m.groupchat.speaker_selection_method == "round_robin"
assert [a.name for a in m.groupchat.agents] == ["searcher", "scraper", "reporter"]
56 changes: 56 additions & 0 deletions ag2-web-research/tests/test_tools.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
"""Unit tests for Firecrawl tool functions — no API calls."""
import os
import pytest
from unittest.mock import MagicMock, patch

os.environ.setdefault("FIRECRAWL_API_KEY", "test-key")
os.environ.setdefault("OPENAI_API_KEY", "test-key")

# Patch FirecrawlApp before importing main
with patch("firecrawl.FirecrawlApp") as mock_fc_cls:
mock_fc = MagicMock()
mock_fc_cls.return_value = mock_fc
import main as ag2_main


def test_search_web_returns_formatted_results():
ag2_main.firecrawl.search.return_value = {
"data": [
{"title": "Page 1", "url": "https://example.com/1", "description": "Desc 1"},
{"title": "Page 2", "url": "https://example.com/2", "description": "Desc 2"},
]
}
result = ag2_main.search_web("test query", num_results=2)
assert "Page 1" in result
assert "https://example.com/1" in result
assert "Found 2 results" in result


def test_search_web_handles_empty_results():
ag2_main.firecrawl.search.return_value = {"data": []}
result = ag2_main.search_web("obscure topic")
assert "Found 0 results" in result


def test_scrape_pages_limits_to_three():
ag2_main.firecrawl.scrape_url.reset_mock()
ag2_main.firecrawl.scrape_url.return_value = {"markdown": "Content here"}
ag2_main.firecrawl.scrape_url.side_effect = None
urls = [f"https://example.com/{i}" for i in range(5)]
ag2_main.scrape_pages(urls)
assert ag2_main.firecrawl.scrape_url.call_count == 3 # max 3


def test_scrape_pages_handles_errors_gracefully():
ag2_main.firecrawl.scrape_url.side_effect = Exception("Rate limited")
result = ag2_main.scrape_pages(["https://example.com/fail"])
assert "Rate limited" in result or "error" in result.lower()
ag2_main.firecrawl.scrape_url.side_effect = None


def test_deep_research_returns_analysis():
ag2_main.firecrawl.deep_research.return_value = {
"data": {"finalAnalysis": "This is the deep analysis."}
}
result = ag2_main.deep_research("AI agents")
assert "deep analysis" in result