Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 80 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ This is an n8n community node. It lets you use **[Firecrawl](https://firecrawl.d

[Installation](#installation)
[Operations](#operations)
[Tool Node Support](#tool-node-support)
[Credentials](#credentials)
[Compatibility](#compatibility)
[Resources](#resources)
Expand Down Expand Up @@ -76,7 +77,79 @@ The **Firecrawl** node supports the following operations:
- Get historical token usage for your team

### Team Queue Status
- Get your team’s current queue load (waiting, active, max concurrency)
- Get your team's current queue load (waiting, active, max concurrency)

## Tool Node Support

The Firecrawl node now supports **Tool Node** functionality for n8n's AI Agent system! This makes Firecrawl seamlessly usable by AI Agents in n8n workflows.

### Available Tools

When used as a Tool Node, Firecrawl exposes the following tools that AI Agents can use:

#### 1. **scrape_url**
Scrapes a URL and extracts its content in LLM-ready format (markdown, HTML, or structured data).

**Parameters:**
- `url` (required): The URL to scrape
- `formats`: Output formats (comma-separated): markdown, html, rawHtml, screenshot, summary
- `onlyMainContent`: Only return main content (default: true)
- `actions`: JSON array of actions to interact with dynamic content
- `headers`: JSON object of custom headers
- `waitFor`: Wait milliseconds for page to load

#### 2. **extract_data**
Extracts structured data from one or more URLs using AI. Define a schema to get specific data fields from pages.

**Parameters:**
- `urls` (required): Comma-separated list of URLs to extract from (supports glob patterns)
- `schema` (required): JSON schema defining the structure of data to extract
- `prompt`: Optional prompt to guide the extraction
- `enableWebSearch`: Enable web search to find additional data
- `ignoreSitemap`: Ignore the website sitemap
- `includeSubdomains`: Include subdomains

#### 3. **search_website**
Search through a website and optionally scrape the results.

**Parameters:**
- `url` (required): The URL to search on
- `search` (required): Search query
- `scrape`: Whether to scrape search results
- `limit`: Maximum number of results

#### 4. **crawl_website**
Crawls an entire website and returns structured data from all pages.

**Parameters:**
- `url` (required): The URL to start crawling from
- `limit`: Maximum number of pages to crawl (default: 100)
- `excludePaths`: Comma-separated path patterns to exclude
- `includePaths`: Comma-separated path patterns to include
- `prompt`: Natural language prompt to guide the crawl
- `formats`: Output formats
- `allowExternalLinks`: Allow crawling external domains
- `allowSubdomains`: Allow crawling subdomains

#### 5. **map_website**
Get all URLs from a website without scraping content.

**Parameters:**
- `url` (required): The URL to map
- `limit`: Maximum number of URLs (default: 1000)
- `excludePaths`: Path patterns to exclude
- `includePaths`: Path patterns to include

### Using Firecrawl with n8n AI Agents

To use Firecrawl as a Tool Node in your n8n AI Agent workflows:

1. Add a Firecrawl node to your workflow
2. Configure your Firecrawl credentials
3. The AI Agent will automatically discover and use Firecrawl tools
4. When the agent needs to scrape, extract, search, or crawl, it will use the appropriate Firecrawl tool

This makes it easier to integrate web data extraction into AI Agent workflows without manual configuration.

## Credentials

Expand All @@ -103,6 +176,12 @@ To use the Firecrawl node, you need to:

## Version history

### 1.0.7
- Add Tool Node support for n8n AI Agent system
- Firecrawl can now be used as a Tool Node in AI Agent workflows
- Added tool definitions for scrape_url, extract_data, search_website, crawl_website, and map_website
- Improved discoverability and easier setup for AI Agent integrations

### 1.0.6
- Add support for additional Firecrawl endpoints:
- Batch Scrape (start/status/errors)
Expand Down
7 changes: 6 additions & 1 deletion nodes/Firecrawl/Firecrawl.node.ts
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
import type { INodeType, INodeTypeDescription } from 'n8n-workflow';
import { allMethods } from './methods';
import { allProperties } from './properties';
import { tools } from './tools';
import type { Tool } from './tools';

/**
* Firecrawl API Node implementation
* Supports both standard node and Tool Node formats for n8n AI Agent integration
*/
export class Firecrawl implements INodeType {
description: INodeTypeDescription = {
description: INodeTypeDescription & { tools?: Tool[] } = {
displayName: 'Firecrawl',
name: 'firecrawl',
icon: 'file:firecrawl.svg',
Expand Down Expand Up @@ -36,6 +39,8 @@ export class Firecrawl implements INodeType {
},
},
properties: allProperties,
// Add Tool Node support for n8n AI Agent system
tools: tools,
};

methods = allMethods;
Expand Down
233 changes: 233 additions & 0 deletions nodes/Firecrawl/tools.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,233 @@
/**
* Tool definitions for Firecrawl operations in n8n AI Agent system
* These tools enable Firecrawl to function as a Tool Node in n8n
*
* Note: The 'tools' property in INodeTypeDescription is currently a newer feature
* in n8n. If ITool type is not available, we define the structure manually.
*/
export interface ToolParameter {
name: string;
type: string;
required?: boolean;
description?: string;
placeholder?: string;
default?: string | boolean | number;
}

export interface Tool {
name: string;
description: string;
parameters: ToolParameter[];
}

export const tools: Tool[] = [
{
name: 'scrape_url',
description: 'Scrapes a URL and extracts its content in LLM-ready format (markdown, HTML, or structured data). Use this to get clean, formatted content from any webpage.',
parameters: [
{
name: 'url',
type: 'string',
required: true,
description: 'The URL to scrape',
placeholder: 'https://example.com',
},
{
name: 'formats',
type: 'string',
description: 'Output formats (comma-separated): markdown, html, rawHtml, screenshot, summary',
default: 'markdown',
},
{
name: 'onlyMainContent',
type: 'boolean',
description: 'Only return main content excluding headers, navs, footers',
default: true,
},
{
name: 'actions',
type: 'string',
description: 'JSON array of actions to interact with dynamic content (click, wait, scroll, etc.)',
},
{
name: 'headers',
type: 'string',
description: 'JSON object of custom headers to send with the request',
},
{
name: 'waitFor',
type: 'number',
description: 'Wait milliseconds for page to load before fetching content',
default: 0,
},
],
},
{
name: 'extract_data',
description: 'Extracts structured data from one or more URLs using AI. Define a schema to get specific data fields from pages.',
parameters: [
{
name: 'urls',
type: 'string',
required: true,
description: 'Comma-separated list of URLs to extract from (supports glob patterns)',
placeholder: 'https://example.com/*',
},
{
name: 'schema',
type: 'string',
required: true,
description: 'JSON schema defining the structure of data to extract',
placeholder: '{"title": "string", "price": "number"}',
},
{
name: 'prompt',
type: 'string',
description: 'Optional prompt to guide the extraction process',
},
{
name: 'enableWebSearch',
type: 'boolean',
description: 'Enable web search to find additional data',
default: false,
},
{
name: 'ignoreSitemap',
type: 'boolean',
description: 'Ignore the website sitemap when crawling',
default: true,
},
{
name: 'includeSubdomains',
type: 'boolean',
description: 'Include subdomains of the website',
default: false,
},
],
},
{
name: 'search_website',
description: 'Search through a website and optionally scrape the results. Useful for finding specific content across a domain.',
parameters: [
{
name: 'url',
type: 'string',
required: true,
description: 'The URL to search on',
placeholder: 'https://example.com',
},
{
name: 'search',
type: 'string',
required: true,
description: 'Search query to find content',
placeholder: 'keywords to search for',
},
{
name: 'scrape',
type: 'boolean',
description: 'Whether to scrape the search results',
default: false,
},
{
name: 'limit',
type: 'number',
description: 'Maximum number of search results to return',
default: 10,
},
],
},
{
name: 'crawl_website',
description: 'Crawls an entire website and returns structured data from all pages. Use this to get content from multiple pages on a domain.',
parameters: [
{
name: 'url',
type: 'string',
required: true,
description: 'The URL to start crawling from',
placeholder: 'https://example.com',
},
{
name: 'limit',
type: 'number',
description: 'Maximum number of pages to crawl',
default: 100,
},
{
name: 'excludePaths',
type: 'string',
description: 'Comma-separated path patterns to exclude (e.g., "blog/*,admin/*")',
},
{
name: 'includePaths',
type: 'string',
description: 'Comma-separated path patterns to include',
},
{
name: 'prompt',
type: 'string',
description: 'Natural language prompt to guide the crawl',
},
{
name: 'formats',
type: 'string',
description: 'Output formats (comma-separated): markdown, html, screenshot',
default: 'markdown',
},
{
name: 'allowExternalLinks',
type: 'boolean',
description: 'Allow crawling external domains',
default: false,
},
{
name: 'allowSubdomains',
type: 'boolean',
description: 'Allow crawling subdomains',
default: false,
},
],
},
{
name: 'map_website',
description: 'Get all URLs from a website without scraping content. Use this to discover all pages on a domain.',
parameters: [
{
name: 'url',
type: 'string',
required: true,
description: 'The URL to map',
placeholder: 'https://example.com',
},
{
name: 'limit',
type: 'number',
description: 'Maximum number of URLs to return',
default: 1000,
},
{
name: 'excludePaths',
type: 'string',
description: 'Comma-separated path patterns to exclude',
},
{
name: 'includePaths',
type: 'string',
description: 'Comma-separated path patterns to include',
},
],
},
];

/**
* Maps tool names to their corresponding Firecrawl operations
*/
export const toolToOperationMap: Record<string, string> = {
scrape_url: 'scrape',
extract_data: 'extract',
search_website: 'search',
crawl_website: 'crawl',
map_website: 'map',
};

Loading