Skip to content

Commit

Permalink
refactor: firecrawl scrape outputs
Browse files Browse the repository at this point in the history
  • Loading branch information
JeanKaddour committed Feb 12, 2025
1 parent 4f6b46e commit cc419f4
Showing 1 changed file with 6 additions and 9 deletions.
15 changes: 6 additions & 9 deletions backend/app/nodes/integrations/firecrawl/firecrawl_scrape.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ class Config:


class FirecrawlScrapeNodeOutput(BaseNodeOutput):
scrape_result: str = Field(
..., description="The scraped data in markdown or structured format."
markdown: str = Field(
..., description="The scraped data in markdown format."
)


Expand All @@ -26,7 +26,7 @@ class FirecrawlScrapeNodeConfig(BaseNodeConfig):
description="The URL to scrape and convert into clean markdown or structured data.",
)
output_schema: Dict[str, str] = Field(
default={"scrape_result": "string"},
default={"markdown": "string"},
description="The schema for the output of the node",
)
has_fixed_output: bool = True
Expand Down Expand Up @@ -59,17 +59,14 @@ async def run(self, input: BaseModel) -> BaseModel:
self.config.url_template, raw_input_dict, self.name
)

if not os.getenv("FIRECRAWL_API_KEY"):
raise ValueError("FIRECRAWL_API_KEY environment variable is not set")

app = FirecrawlApp() # type: ignore
scrape_result = app.scrape_url( # type: ignore
url_template,
params={
"formats": ["markdown", "html"],
"formats": ["markdown"],
},
)
return FirecrawlScrapeNodeOutput(scrape_result=json.dumps(scrape_result))
return FirecrawlScrapeNodeOutput(markdown=scrape_result["markdown"])
except Exception as e:
logging.error(f"Failed to scrape URL: {e}")
return FirecrawlScrapeNodeOutput(scrape_result="")
return FirecrawlScrapeNodeOutput(markdown="")

0 comments on commit cc419f4

Please sign in to comment.