Skip to content

Commit b2890d5

Browse files
committed
fix: broken sdk
1 parent b0e3276 commit b2890d5

File tree

15 files changed

+213
-37
lines changed

15 files changed

+213
-37
lines changed

scrapegraph-py/examples/async/async_crawl_example.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ async def main():
9494
crawl_response = await client.crawl(
9595
url=url,
9696
prompt=prompt,
97-
schema=schema,
97+
data_schema=schema,
9898
cache_website=True,
9999
depth=2,
100100
max_pages=2,

scrapegraph-py/examples/miscellaneous/crawl_example.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ def main():
8989
crawl_response = client.crawl(
9090
url=url,
9191
prompt=prompt,
92-
schema=schema,
92+
data_schema=schema,
9393
cache_website=True,
9494
depth=2,
9595
max_pages=2,

scrapegraph-py/examples/sync/crawl_example.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ def main():
7575
crawl_response = client.crawl(
7676
url=url,
7777
prompt=prompt,
78-
schema=schema,
78+
data_schema=schema,
7979
cache_website=True,
8080
depth=2,
8181
max_pages=2,

scrapegraph-py/examples/sync/smartscraper_infinite_scroll_example.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import os
12
from scrapegraph_py import Client
23
from scrapegraph_py.logger import sgai_logger
34
from pydantic import BaseModel
@@ -14,8 +15,9 @@ class Company(BaseModel):
1415
class CompaniesResponse(BaseModel):
1516
companies: List[Company]
1617

17-
# Initialize the client with explicit API key
18-
sgai_client = Client(api_key="sgai-api-key")
18+
# Initialize the client with API key from environment variable
19+
# Make sure to set SGAI_API_KEY in your environment or .env file
20+
sgai_client = Client.from_env()
1921

2022
try:
2123
# SmartScraper request with infinite scroll

scrapegraph-py/scrapegraph_py/async_client.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -306,7 +306,7 @@ async def crawl(
306306
self,
307307
url: str,
308308
prompt: str,
309-
schema: Dict[str, Any],
309+
data_schema: Dict[str, Any],
310310
cache_website: bool = True,
311311
depth: int = 2,
312312
max_pages: int = 2,
@@ -317,7 +317,7 @@ async def crawl(
317317
logger.info("🔍 Starting crawl request")
318318
logger.debug(f"🌐 URL: {url}")
319319
logger.debug(f"📝 Prompt: {prompt}")
320-
logger.debug(f"📊 Schema provided: {bool(schema)}")
320+
logger.debug(f"📊 Schema provided: {bool(data_schema)}")
321321
logger.debug(f"💾 Cache website: {cache_website}")
322322
logger.debug(f"🔍 Depth: {depth}")
323323
logger.debug(f"📄 Max pages: {max_pages}")
@@ -327,7 +327,7 @@ async def crawl(
327327
request = CrawlRequest(
328328
url=url,
329329
prompt=prompt,
330-
schema=schema,
330+
data_schema=data_schema,
331331
cache_website=cache_website,
332332
depth=depth,
333333
max_pages=max_pages,

scrapegraph-py/scrapegraph_py/client.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -309,7 +309,7 @@ def crawl(
309309
self,
310310
url: str,
311311
prompt: str,
312-
schema: Dict[str, Any],
312+
data_schema: Dict[str, Any],
313313
cache_website: bool = True,
314314
depth: int = 2,
315315
max_pages: int = 2,
@@ -320,7 +320,7 @@ def crawl(
320320
logger.info("🔍 Starting crawl request")
321321
logger.debug(f"🌐 URL: {url}")
322322
logger.debug(f"📝 Prompt: {prompt}")
323-
logger.debug(f"📊 Schema provided: {bool(schema)}")
323+
logger.debug(f"📊 Schema provided: {bool(data_schema)}")
324324
logger.debug(f"💾 Cache website: {cache_website}")
325325
logger.debug(f"🔍 Depth: {depth}")
326326
logger.debug(f"📄 Max pages: {max_pages}")
@@ -330,7 +330,7 @@ def crawl(
330330
request = CrawlRequest(
331331
url=url,
332332
prompt=prompt,
333-
schema=schema,
333+
data_schema=data_schema,
334334
cache_website=cache_website,
335335
depth=depth,
336336
max_pages=max_pages,

scrapegraph-py/scrapegraph_py/models/crawl.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ class CrawlRequest(BaseModel):
1717
example="What does the company do? and I need text content from there privacy and terms",
1818
description="The prompt to guide the crawl and extraction"
1919
)
20-
schema: Dict[str, Any] = Field(
20+
data_schema: Dict[str, Any] = Field(
2121
...,
2222
description="JSON schema defining the structure of the extracted data"
2323
)
@@ -62,11 +62,11 @@ def validate_prompt(self) -> "CrawlRequest":
6262
return self
6363

6464
@model_validator(mode="after")
65-
def validate_schema(self) -> "CrawlRequest":
66-
if not isinstance(self.schema, dict):
67-
raise ValueError("Schema must be a dictionary")
68-
if not self.schema:
69-
raise ValueError("Schema cannot be empty")
65+
def validate_data_schema(self) -> "CrawlRequest":
66+
if not isinstance(self.data_schema, dict):
67+
raise ValueError("Data schema must be a dictionary")
68+
if not self.data_schema:
69+
raise ValueError("Data schema cannot be empty")
7070
return self
7171

7272

scrapegraph-py/scrapegraph_py/models/feedback.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,3 +20,8 @@ def validate_request_id(self) -> "FeedbackRequest":
2020
except ValueError:
2121
raise ValueError("request_id must be a valid UUID")
2222
return self
23+
24+
def model_dump(self, *args, **kwargs) -> dict:
25+
# Set exclude_none=True to exclude None values from serialization
26+
kwargs.setdefault('exclude_none', True)
27+
return super().model_dump(*args, **kwargs)

scrapegraph-py/scrapegraph_py/models/markdownify.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,11 @@ def validate_url(self) -> "MarkdownifyRequest":
2828
raise ValueError("Invalid URL")
2929
return self
3030

31+
def model_dump(self, *args, **kwargs) -> dict:
32+
# Set exclude_none=True to exclude None values from serialization
33+
kwargs.setdefault('exclude_none', True)
34+
return super().model_dump(*args, **kwargs)
35+
3136

3237
class GetMarkdownifyRequest(BaseModel):
3338
"""Request model for get_markdownify endpoint"""

scrapegraph-py/scrapegraph_py/models/searchscraper.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@ def validate_user_prompt(self) -> "SearchScraperRequest":
3434
return self
3535

3636
def model_dump(self, *args, **kwargs) -> dict:
37+
# Set exclude_none=True to exclude None values from serialization
38+
kwargs.setdefault('exclude_none', True)
3739
data = super().model_dump(*args, **kwargs)
3840
# Convert the Pydantic model schema to dict if present
3941
if self.output_schema is not None:

0 commit comments

Comments
 (0)