Skip to content

Commit 942290e

Browse files
committed
Adapt to Crawlee v1
1 parent 82efd3e commit 942290e

19 files changed

+98
-106
lines changed

docs/03_concepts/code/03_dataset_exports.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,14 +11,14 @@ async def main() -> None:
1111
await dataset.export_to(
1212
content_type='csv',
1313
key='data.csv',
14-
to_key_value_store_name='my-cool-key-value-store',
14+
to_kvs_name='my-cool-key-value-store',
1515
)
1616

1717
# Export the data as JSON
1818
await dataset.export_to(
1919
content_type='json',
2020
key='data.json',
21-
to_key_value_store_name='my-cool-key-value-store',
21+
to_kvs_name='my-cool-key-value-store',
2222
)
2323

2424
# Print the exported records

docs/03_concepts/code/conditional_actor_charge.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,7 @@ async def main() -> None:
66
# Check the dataset because there might already be items
77
# if the run migrated or was restarted
88
default_dataset = await Actor.open_dataset()
9-
dataset_info = await default_dataset.get_info()
10-
charged_items = dataset_info.item_count if dataset_info else 0
9+
charged_items = default_dataset.metadata.item_count
1110

1211
# highlight-start
1312
if Actor.get_charging_manager().get_pricing_info().is_pay_per_event:

pyproject.toml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ keywords = [
3636
dependencies = [
3737
"apify-client>=1.9.2",
3838
"apify-shared>=1.3.0",
39-
"crawlee~=0.6.0",
39+
"crawlee@git+https://github.com/apify/crawlee-python.git@new-storage-clients",
4040
"cryptography>=42.0.0",
4141
"httpx>=0.27.0",
4242
# TODO: ensure compatibility with the latest version of lazy-object-proxy
@@ -78,6 +78,9 @@ dev = [
7878
[tool.hatch.build.targets.wheel]
7979
packages = ["src/apify"]
8080

81+
[tool.hatch.metadata]
82+
allow-direct-references = true
83+
8184
[tool.ruff]
8285
line-length = 120
8386
include = ["src/**/*.py", "tests/**/*.py", "docs/**/*.py", "website/**/*.py"]

src/apify/_actor.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ def __init__(
8888

8989
# Create an instance of the cloud storage client, the local storage client is obtained
9090
# from the service locator.
91-
self._cloud_storage_client = ApifyStorageClient.from_config(config=self._configuration)
91+
self._cloud_storage_client = ApifyStorageClient()
9292

9393
# Set the event manager based on whether the Actor is running on the platform or locally.
9494
self._event_manager = (

src/apify/apify_storage_client/_dataset_client.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,8 +54,8 @@ def __init__(
5454
self._lock = asyncio.Lock()
5555
"""A lock to ensure that only one operation is performed at a time."""
5656

57-
@override
5857
@property
58+
@override
5959
def metadata(self) -> DatasetMetadata:
6060
return self._metadata
6161

src/apify/apify_storage_client/_key_value_store_client.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@
1010
from apify_client import ApifyClientAsync
1111
from crawlee.storage_clients._base import KeyValueStoreClient
1212
from crawlee.storage_clients.models import KeyValueStoreMetadata, KeyValueStoreRecord, KeyValueStoreRecordMetadata
13-
from ._models import KeyValueStoreListKeysPage
1413

14+
from ._models import KeyValueStoreListKeysPage
1515
from apify._crypto import create_hmac_signature
1616

1717
if TYPE_CHECKING:
@@ -56,8 +56,8 @@ def __init__(
5656
self._lock = asyncio.Lock()
5757
"""A lock to ensure that only one operation is performed at a time."""
5858

59-
@override
6059
@property
60+
@override
6161
def metadata(self) -> KeyValueStoreMetadata:
6262
return self._metadata
6363

src/apify/apify_storage_client/_request_queue_client.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,8 +86,8 @@ def __init__(
8686
self._should_check_for_forefront_requests = False
8787
"""Whether to check for forefront requests in the next list_head call."""
8888

89-
@override
9089
@property
90+
@override
9191
def metadata(self) -> RequestQueueMetadata:
9292
return self._metadata
9393

src/apify/scrapy/extensions/_httpcache.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -51,10 +51,14 @@ def open_spider(self, spider: Spider) -> None:
5151
kvs_name = get_kvs_name(spider.name)
5252

5353
async def open_kvs() -> KeyValueStore:
54-
config = Configuration.get_global_configuration()
55-
if config.is_at_home:
56-
storage_client = ApifyStorageClient.from_config(config)
57-
return await KeyValueStore.open(name=kvs_name, storage_client=storage_client)
54+
configuration = Configuration.get_global_configuration()
55+
if configuration.is_at_home:
56+
storage_client = ApifyStorageClient()
57+
return await KeyValueStore.open(
58+
name=kvs_name,
59+
configuration=configuration,
60+
storage_client=storage_client,
61+
)
5862
return await KeyValueStore.open(name=kvs_name)
5963

6064
logger.debug("Starting background thread for cache storage's event loop")

src/apify/scrapy/scheduler.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -49,10 +49,13 @@ def open(self, spider: Spider) -> Deferred[None] | None:
4949
self.spider = spider
5050

5151
async def open_rq() -> RequestQueue:
52-
config = Configuration.get_global_configuration()
53-
if config.is_at_home:
54-
storage_client = ApifyStorageClient.from_config(config)
55-
return await RequestQueue.open(storage_client=storage_client)
52+
configuration = Configuration.get_global_configuration()
53+
if configuration.is_at_home:
54+
storage_client = ApifyStorageClient()
55+
return await RequestQueue.open(
56+
configuration=configuration,
57+
storage_client=storage_client,
58+
)
5659
return await RequestQueue.open()
5760

5861
try:

tests/integration/conftest.py

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
from apify_client import ApifyClient, ApifyClientAsync
1616
from apify_shared.consts import ActorJobStatus, ActorSourceType, ApifyEnvVars
1717
from crawlee import service_locator
18-
from crawlee.storages import _creation_management
18+
from crawlee.storages import Dataset, KeyValueStore, RequestQueue
1919

2020
import apify._actor
2121
from ._utils import generate_unique_resource_name
@@ -65,12 +65,15 @@ def _prepare_test_env() -> None:
6565
service_locator._storage_client = None
6666

6767
# Clear creation-related caches to ensure no state is carried over between tests.
68-
monkeypatch.setattr(_creation_management, '_cache_dataset_by_id', {})
69-
monkeypatch.setattr(_creation_management, '_cache_dataset_by_name', {})
70-
monkeypatch.setattr(_creation_management, '_cache_kvs_by_id', {})
71-
monkeypatch.setattr(_creation_management, '_cache_kvs_by_name', {})
72-
monkeypatch.setattr(_creation_management, '_cache_rq_by_id', {})
73-
monkeypatch.setattr(_creation_management, '_cache_rq_by_name', {})
68+
Dataset._cache_by_id.clear()
69+
Dataset._cache_by_name.clear()
70+
Dataset._default_instance = None
71+
KeyValueStore._cache_by_id.clear()
72+
KeyValueStore._cache_by_name.clear()
73+
KeyValueStore._default_instance = None
74+
RequestQueue._cache_by_id.clear()
75+
RequestQueue._cache_by_name.clear()
76+
RequestQueue._default_instance = None
7477

7578
# Verify that the test environment was set up correctly.
7679
assert os.environ.get(ApifyEnvVars.LOCAL_STORAGE_DIR) == str(tmp_path)

0 commit comments

Comments
 (0)