Skip to content

Commit a01e35b

Browse files
committed
[huggingface_hub] docstrings + version bump
1 parent 33d9ef6 commit a01e35b

File tree

2 files changed

+48
-2
lines changed

2 files changed

+48
-2
lines changed

metaflow_extensions/obcheckpoint/plugins/machine_learning_utilities/hf_hub/decorator.py

Lines changed: 47 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,41 @@ def snapshot_download(self, **kwargs) -> dict:
144144

145145

146146
class HuggingfaceLoadedModels:
147+
"""Manages loaded Hugging Face models and provides access to their local paths.
148+
149+
This class is accessed through `current.huggingface_hub.loaded` and provides a dictionary-like
150+
interface to access the local paths of the huggingface repos specified in the `load` argument of the `@huggingface_hub` decorator.
151+
152+
Usage:
153+
------
154+
```python
155+
# Basic loading and access
156+
@huggingface_hub(load=["mistralai/Mistral-7B-Instruct-v0.1"])
157+
@step
158+
def my_step(self):
159+
# Access the local path of a loaded model
160+
model_path = current.huggingface_hub.loaded["mistralai/Mistral-7B-Instruct-v0.1"]
161+
162+
# Check if a model is loaded
163+
if "mistralai/Mistral-7B-Instruct-v0.1" in current.huggingface_hub.loaded:
164+
print("Model is loaded!")
165+
166+
# Custom path and advanced loading
167+
@huggingface_hub(load=[
168+
("mistralai/Mistral-7B-Instruct-v0.1", "/custom/path"), # Specify custom path
169+
{
170+
"repo_id": "org/model-name",
171+
"force_download": True, # Force fresh download
172+
"repo_type": "dataset" # Load dataset instead of model
173+
}
174+
])
175+
@step
176+
def another_step(self):
177+
# Models are available at specified paths
178+
pass
179+
```
180+
"""
181+
147182
def __init__(
148183
self, checkpointer: "HuggingfaceRegistry", logger, temp_dir_root=None
149184
) -> None:
@@ -279,7 +314,18 @@ class HuggingfaceHubDecorator(CheckpointDecorator):
279314
The root directory that will hold the temporary directory where objects will be downloaded.
280315
281316
load: Union[List[str], List[Tuple[Dict, str]], List[Tuple[str, str]], List[Dict], None]
282-
The list of models to load.
317+
The list of repos (models/datasets) to load.
318+
319+
Loaded repos can be accessed via `current.huggingface_hub.loaded`. If load is set, then the following happens:
320+
321+
- If repo (model/dataset) is not found in the datastore:
322+
- Downloads the repo from Hugging Face Hub to a temporary directory (or uses specified path) for local access
323+
- Stores it in Metaflow's datastore (s3/gcs/azure etc.) with a unique name based on repo_type/repo_id
324+
- All HF models loaded for a `@step` will be cached separately under flow/step/namespace.
325+
326+
- If repo is found in the datastore:
327+
- Loads it directly from datastore to local path (can be temporary directory or specified path)
328+
283329
284330
MF Add To Current
285331
-----------------

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from setuptools import setup, find_namespace_packages
22

3-
version = "0.1.3"
3+
version = "0.1.4"
44

55

66
def get_long_description() -> str:

0 commit comments

Comments
 (0)