Skip to content

Commit 104a168

Browse files
committed
Adapt to Crawlee v1 (p2)
1 parent 942290e commit 104a168

25 files changed

+112
-110
lines changed

pyproject.toml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ keywords = [
3636
dependencies = [
3737
"apify-client>=1.9.2",
3838
"apify-shared>=1.3.0",
39+
"cachetools>=5.5.0",
3940
"crawlee@git+https://github.com/apify/crawlee-python.git@new-storage-clients",
4041
"cryptography>=42.0.0",
4142
"httpx>=0.27.0",
@@ -72,7 +73,8 @@ dev = [
7273
"pytest~=8.4.0",
7374
"respx~=0.22.0",
7475
"ruff~=0.11.0",
75-
"setuptools", # setuptools are used by pytest but not explicitly required
76+
"setuptools", # setuptools are used by pytest but not explicitly required
77+
"types-cachetools>=6.0.0.20250525",
7678
]
7779

7880
[tool.hatch.build.targets.wheel]

src/apify/_actor.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,8 @@
3333
from apify._platform_event_manager import EventManager, LocalEventManager, PlatformEventManager
3434
from apify._proxy_configuration import ProxyConfiguration
3535
from apify._utils import docs_group, docs_name, get_system_info, is_running_in_ipython
36-
from apify.apify_storage_client import ApifyStorageClient
3736
from apify.log import _configure_logging, logger
37+
from apify.storage_clients import ApifyStorageClient
3838
from apify.storages import Dataset, KeyValueStore, RequestQueue
3939

4040
if TYPE_CHECKING:

src/apify/_proxy_configuration.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,8 @@
2121

2222
if TYPE_CHECKING:
2323
from apify_client import ApifyClientAsync
24-
from crawlee import Request
24+
25+
from apify import Request
2526

2627
APIFY_PROXY_VALUE_REGEX = re.compile(r'^[\w._~]+$')
2728
COUNTRY_CODE_REGEX = re.compile(r'^[A-Z]{2}$')

src/apify/scrapy/extensions/_httpcache.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,8 @@
1313
from scrapy.responsetypes import responsetypes
1414

1515
from apify import Configuration
16-
from apify.apify_storage_client import ApifyStorageClient
1716
from apify.scrapy._async_thread import AsyncThread
17+
from apify.storage_clients import ApifyStorageClient
1818
from apify.storages import KeyValueStore
1919

2020
if TYPE_CHECKING:

src/apify/scrapy/requests.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,10 @@
1010
from scrapy.http.headers import Headers
1111
from scrapy.utils.request import request_from_dict
1212

13-
from crawlee import Request as ApifyRequest
1413
from crawlee._types import HttpHeaders
1514

15+
from apify import Request as ApifyRequest
16+
1617
logger = getLogger(__name__)
1718

1819

src/apify/scrapy/scheduler.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from ._async_thread import AsyncThread
1212
from .requests import to_apify_request, to_scrapy_request
1313
from apify import Configuration
14-
from apify.apify_storage_client import ApifyStorageClient
14+
from apify.storage_clients import ApifyStorageClient
1515
from apify.storages import RequestQueue
1616

1717
if TYPE_CHECKING:

src/apify/storage_clients/__init__.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
from crawlee.storage_clients import FileSystemStorageClient, MemoryStorageClient, StorageClient
2+
3+
from ._apify import ApifyStorageClient
4+
5+
__all__ = [
6+
'ApifyStorageClient',
7+
'FileSystemStorageClient',
8+
'MemoryStorageClient',
9+
'StorageClient',
10+
]

src/apify/apify_storage_client/_dataset_client.py renamed to src/apify/storage_clients/_apify/_dataset_client.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,7 @@
1515
from datetime import datetime
1616

1717
from apify_client.clients import DatasetClientAsync
18-
19-
from apify import Configuration
18+
from crawlee.configuration import Configuration
2019

2120
logger = getLogger(__name__)
2221

@@ -68,8 +67,13 @@ async def open(
6867
name: str | None,
6968
configuration: Configuration,
7069
) -> ApifyDatasetClient:
71-
token = configuration.token
72-
api_url = configuration.api_base_url
70+
token = getattr(configuration, 'token', None)
71+
if not token:
72+
raise ValueError(f'Apify storage client requires a valid token in Configuration (token={token}).')
73+
74+
api_url = getattr(configuration, 'api_base_url', None)
75+
if not api_url:
76+
raise ValueError(f'Apify storage client requires a valid API URL in Configuration (api_url={api_url}).')
7377

7478
# Otherwise, create a new one.
7579
apify_client_async = ApifyClientAsync(
@@ -100,7 +104,8 @@ async def open(
100104

101105
@override
102106
async def purge(self) -> None:
103-
# TODO: better
107+
# TODO: better?
108+
# https://github.com/apify/apify-sdk-python/issues/469
104109
async with self._lock:
105110
await self._api_client.delete()
106111

src/apify/apify_storage_client/_key_value_store_client.py renamed to src/apify/storage_clients/_apify/_key_value_store_client.py

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,7 @@
1919
from datetime import datetime
2020

2121
from apify_client.clients import KeyValueStoreClientAsync
22-
23-
from apify import Configuration
22+
from crawlee.configuration import Configuration
2423

2524
logger = getLogger(__name__)
2625

@@ -70,8 +69,13 @@ async def open(
7069
name: str | None,
7170
configuration: Configuration,
7271
) -> ApifyKeyValueStoreClient:
73-
token = configuration.token
74-
api_url = configuration.api_base_url
72+
token = getattr(configuration, 'token', None)
73+
if not token:
74+
raise ValueError(f'Apify storage client requires a valid token in Configuration (token={token}).')
75+
76+
api_url = getattr(configuration, 'api_base_url', None)
77+
if not api_url:
78+
raise ValueError(f'Apify storage client requires a valid API URL in Configuration (api_url={api_url}).')
7579

7680
# Otherwise, create a new one.
7781
apify_client_async = ApifyClientAsync(
@@ -101,7 +105,8 @@ async def open(
101105

102106
@override
103107
async def purge(self) -> None:
104-
# TODO: better
108+
# TODO: better?
109+
# https://github.com/apify/apify-sdk-python/issues/469
105110
async with self._lock:
106111
await self._api_client.delete()
107112

@@ -147,7 +152,13 @@ async def iterate_keys(
147152
list_key_page = KeyValueStoreListKeysPage.model_validate(response)
148153

149154
for item in list_key_page.items:
150-
yield item
155+
# Convert KeyValueStoreKeyInfo to KeyValueStoreRecordMetadata
156+
record_metadata = KeyValueStoreRecordMetadata(
157+
key=item.key,
158+
size=item.size,
159+
content_type='application/octet-stream', # Content type not available from list_keys
160+
)
161+
yield record_metadata
151162
count += 1
152163

153164
# If we've reached the limit, stop yielding

0 commit comments

Comments
 (0)