firecrawl · human-pages-ai · Apr 20, 2026
diff --git a/human-fallback-agent/.env.example b/human-fallback-agent/.env.example
@@ -0,0 +1,2 @@
+FIRECRAWL_API_KEY=your_firecrawl_key
+HUMANPAGES_API_KEY=hp_your_key
diff --git a/human-fallback-agent/README.md b/human-fallback-agent/README.md
@@ -0,0 +1,36 @@
+# Human Fallback Agent
+
+Scrape with Firecrawl, delegate failures to real humans via Human Pages.
+
+When Firecrawl can't extract data from a page (login walls, CAPTCHAs, dynamic gated content), this agent finds a human who can do it manually and returns structured data.
+
+## Setup
+
+```bash
+pip install firecrawl-py requests
+```
+
+```bash
+cp .env.example .env
+# Add your keys
+```
+
+## How it works
+
+1. Try scraping with Firecrawl
+2. If extraction fails or returns empty, search Human Pages for someone who can help
+3. Create a job offer describing what data to extract
+4. Poll until the human submits results
+5. Continue with the extracted data
+
+## Run
+
+```bash
+python agent.py
+```
+
+## Links
+
+- [Firecrawl docs](https://docs.firecrawl.dev)
+- [Human Pages docs](https://humanpages.ai/dev)
+- [Human Pages MCP server](https://www.npmjs.com/package/humanpages)
diff --git a/human-fallback-agent/agent.py b/human-fallback-agent/agent.py
@@ -0,0 +1,108 @@
+"""
+Firecrawl + Human Pages: scrape with fallback to real humans.
+
+Try Firecrawl first. If extraction fails, hire a human via Human Pages
+to extract the data manually.
+"""
+
+import os
+import time
+
+import requests
+from dotenv import load_dotenv
+from firecrawl import FirecrawlApp
+
+load_dotenv()
+
+firecrawl = FirecrawlApp(api_key=os.environ["FIRECRAWL_API_KEY"])
+HP_KEY = os.environ["HUMANPAGES_API_KEY"]
+HP_BASE = "https://humanpages.ai"
+HP_HEADERS = {"Content-Type": "application/json", "X-Agent-Key": HP_KEY}
+
+TARGET_URL = "https://example.com/pricing"
+EXTRACT_PROMPT = "Extract all pricing tiers with name, price, and features list"
+
+
+def scrape_with_firecrawl(url: str, prompt: str) -> dict | None:
+    result = firecrawl.scrape_url(url, params={"formats": ["extract"], "extract": {"prompt": prompt}})
+    extracted = result.get("extract")
+    if extracted and len(str(extracted)) > 20:
+        return extracted
+    return None
+
+
+def search_humans(skill: str) -> list:
+    resp = requests.get(
+        f"{HP_BASE}/api/humans/search",
+        params={"skill": skill, "available": "true"},
+        headers=HP_HEADERS,
+    )
+    resp.raise_for_status()
+    return resp.json()["results"]
+
+
+def create_job(human_id: str, url: str, prompt: str) -> str:
+    resp = requests.post(
+        f"{HP_BASE}/api/jobs",
+        headers=HP_HEADERS,
+        json={
+            "humanId": human_id,
+            "title": f"Extract data from {url}",
+            "description": f"Please visit {url} and extract the following:\n\n{prompt}\n\nReturn the data as JSON.",
+            "priceUsdc": "5.00",
+            "deadlineHours": 2,
+        },
+    )
+    resp.raise_for_status()
+    return resp.json()["job"]["id"]
+
+
+def wait_for_job(job_id: str, timeout: int = 3600) -> dict:
+    start = time.time()
+    while time.time() - start < timeout:
+        resp = requests.get(f"{HP_BASE}/api/jobs/{job_id}", headers=HP_HEADERS)
+        resp.raise_for_status()
+        job = resp.json()["job"]
+        if job["status"] in ("SUBMITTED", "COMPLETED"):
+            return job
+        if job["status"] in ("REJECTED", "CANCELLED"):
+            raise RuntimeError(f"Job {job_id} was {job['status']}")
+        print(f"  Job status: {job['status']}, waiting...")
+        time.sleep(30)
+    raise TimeoutError(f"Job {job_id} timed out")
+
+
+def main():
+    print(f"Scraping {TARGET_URL} with Firecrawl...")
+    data = scrape_with_firecrawl(TARGET_URL, EXTRACT_PROMPT)
+
+    if data:
+        print("Firecrawl succeeded:")
+        print(data)
+        return data
+
+    print("Firecrawl couldn't extract the data. Searching for a human...")
+    humans = search_humans("data entry")
+    if not humans:
+        print("No humans available.")
+        return None
+
+    human = humans[0]
+    print(f"Found {human.get('name', human['id'])} — creating job...")
+    job_id = create_job(human["id"], TARGET_URL, EXTRACT_PROMPT)
+    print(f"Job created: {job_id}. Waiting for human...")
+
+    job = wait_for_job(job_id)
+    print(f"Human finished! Status: {job['status']}")
+
+    msgs = requests.get(f"{HP_BASE}/api/jobs/{job_id}/messages", headers=HP_HEADERS)
+    msgs.raise_for_status()
+    for msg in msgs.json()["messages"]:
+        if msg["role"] == "human":
+            print(f"Human's response: {msg['content']}")
+
+    return job
+
+
+if __name__ == "__main__":
+    main()
diff --git a/human-fallback-agent/requirements.txt b/human-fallback-agent/requirements.txt
@@ -0,0 +1,3 @@
+firecrawl-py>=1.0.0
+requests>=2.31.0
+python-dotenv>=1.0.0
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		FIRECRAWL_API_KEY=your_firecrawl_key
		HUMANPAGES_API_KEY=hp_your_key