@@ -144,6 +144,41 @@ def snapshot_download(self, **kwargs) -> dict:
144144
145145
146146class HuggingfaceLoadedModels :
147+ """Manages loaded Hugging Face models and provides access to their local paths.
148+
149+ This class is accessed through `current.huggingface_hub.loaded` and provides a dictionary-like
150+ interface to access the local paths of the huggingface repos specified in the `load` argument of the `@huggingface_hub` decorator.
151+
152+ Usage:
153+ ------
154+ ```python
155+ # Basic loading and access
156+ @huggingface_hub(load=["mistralai/Mistral-7B-Instruct-v0.1"])
157+ @step
158+ def my_step(self):
159+ # Access the local path of a loaded model
160+ model_path = current.huggingface_hub.loaded["mistralai/Mistral-7B-Instruct-v0.1"]
161+
162+ # Check if a model is loaded
163+ if "mistralai/Mistral-7B-Instruct-v0.1" in current.huggingface_hub.loaded:
164+ print("Model is loaded!")
165+
166+ # Custom path and advanced loading
167+ @huggingface_hub(load=[
168+ ("mistralai/Mistral-7B-Instruct-v0.1", "/custom/path"), # Specify custom path
169+ {
170+ "repo_id": "org/model-name",
171+ "force_download": True, # Force fresh download
172+ "repo_type": "dataset" # Load dataset instead of model
173+ }
174+ ])
175+ @step
176+ def another_step(self):
177+ # Models are available at specified paths
178+ pass
179+ ```
180+ """
181+
147182 def __init__ (
148183 self , checkpointer : "HuggingfaceRegistry" , logger , temp_dir_root = None
149184 ) -> None :
@@ -279,7 +314,18 @@ class HuggingfaceHubDecorator(CheckpointDecorator):
279314 The root directory that will hold the temporary directory where objects will be downloaded.
280315
281316 load: Union[List[str], List[Tuple[Dict, str]], List[Tuple[str, str]], List[Dict], None]
282- The list of models to load.
317+ The list of repos (models/datasets) to load.
318+
319+ Loaded repos can be accessed via `current.huggingface_hub.loaded`. If load is set, then the following happens:
320+
321+ - If repo (model/dataset) is not found in the datastore:
322+ - Downloads the repo from Hugging Face Hub to a temporary directory (or uses specified path) for local access
323+ - Stores it in Metaflow's datastore (s3/gcs/azure etc.) with a unique name based on repo_type/repo_id
324+ - All HF models loaded for a `@step` will be cached separately under flow/step/namespace.
325+
326+ - If repo is found in the datastore:
327+ - Loads it directly from datastore to local path (can be temporary directory or specified path)
328+
283329
284330 MF Add To Current
285331 -----------------
0 commit comments