diff --git a/.env.template b/.env.template
index ff6a4cb..d41cd68 100644
--- a/.env.template
+++ b/.env.template
@@ -1,3 +1,4 @@
 OPENAI_API_KEY=sk-proj-
 HF_TOKEN=hf_B-
 WANDB_API_KEY=
+PYTHONPATH=.
diff --git a/.gitattributes b/.gitattributes
index 5948bbd..faf1212 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -1,2 +1,40 @@
 # Ignore Jupyter Notebooks from Github Linguist Stats
 *.ipynb linguist-vendored
+
+# Ignore Large File Storage objects
+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.csv filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
diff --git a/.gitignore b/.gitignore
index c029eef..8a83fe9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -167,8 +167,6 @@ cython_debug/
 # Data
 /data
 /temp
-*.parquet
-*.csv
 
 # Write up
 *pdf/
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 69559ab..2511889 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -10,7 +10,26 @@ repos:
   - repo: https://github.com/pre-commit/pre-commit-hooks
     rev: v5.0.0
     hooks:
+      - id: check-added-large-files
+      - id: check-executables-have-shebangs
+      - id: check-json
+      - id: check-merge-conflict
+      - id: check-toml
       - id: check-yaml
       - id: end-of-file-fixer
+      - id: mixed-line-ending
+        args: ["--fix=lf"]
+      - id: requirements-txt-fixer
       - id: trailing-whitespace
-      - id: check-added-large-files
+  - repo: https://github.com/pre-commit/mirrors-mypy
+    rev: v1.13.0
+    hooks:
+      - id: mypy
+        args: ["--ignore-missing-imports"]
+        additional_dependencies:
+          [
+            "types-python-slugify",
+            "types-requests",
+            "types-PyYAML",
+            "types-pytz",
+          ]
diff --git a/README.md b/README.md
index 7a7001e..722efc0 100644
--- a/README.md
+++ b/README.md
@@ -42,7 +42,7 @@ Otherwise, you can try the setup script:
 bash setup.sh
 ```
 
-It attempts to install with [uv](https://docs.astral.sh/uv/) (a fast, Rust-based Python package and project manager) using `.python-version` file and `pyproject.toml` file. This is the recommended way to manage the project, since its resolver is faster and more reliable than `pip`.
+It attempts to install with [uv](https://docs.astral.sh/uv/) (a fast, Rust-based Python package and project manager) using `pyproject.toml` file. This is the recommended way to manage the project, since its dependency resolver is faster and more reliable than `pip`.
 
 Otherwise, it falls back to `pip` installation.
 
diff --git a/pyproject.toml b/pyproject.toml
index bab8427..307081d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -5,9 +5,11 @@ description = "Generate mnemonic sentences for English words"
 readme = "README.md"
 requires-python = ">=3.10"
 dependencies = [
+  "accelerate>=1.0.1",
   "datasets",             # Hugging Face datasets
   "evaluate",             # HF evaluation
-  "gradio>=4.26.0",       # Web app
+  "gradio>=4.26.0",
+  "hf-transfer>=0.1.8",   # Web app
   "numpy<2.0.0",          # Wait for other packages to update
   "openai>=1.57.0",
   "peft",                 # HF parameter-efficient training
@@ -15,8 +17,9 @@ dependencies = [
   "python-dotenv>=1.0.1", # Load environment variables
   "pyyaml>=6.0.2",        # YAML config
   "ruff>=0.7.1",
+  "spaces>=0.31.0",
   "tenacity>=9.0.0",      # Retry (e.g. API calls)
-  "torch>=2.5.1",         # PyTorch
+  "torch>=2.4.0",         # PyTorch
   "tqdm>=4.67.1",         # Progress bar
   "transformers",         # HF transformers
   "trl",                  # HF transformer reinforcement learning
diff --git a/requirements.txt b/requirements.txt
index 33df715..11d5754 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -98,6 +98,7 @@ gitdb==4.0.11
 gitpython==3.1.43
     # via wandb
 gradio==4.26.0
+    # via spaces
 gradio-client==0.15.1
     # via gradio
 h11==0.14.0
@@ -111,6 +112,8 @@ httpx==0.27.2
     #   gradio
     #   gradio-client
     #   openai
+    #   safehttpx
+    #   spaces
 huggingface-hub==0.26.1
     # via
     #   accelerate
@@ -196,6 +199,7 @@ packaging==24.1
     #   huggingface-hub
     #   matplotlib
     #   peft
+    #   spaces
     #   transformers
 pandas==2.2.3
     # via
@@ -220,6 +224,7 @@ psutil==5.9.8
     # via
     #   accelerate
     #   peft
+    #   spaces
     #   wandb
 pyarrow==18.1.0
     # via datasets
@@ -230,6 +235,7 @@ pydantic==2.9.2
     #   fastapi
     #   gradio
     #   openai
+    #   spaces
     #   wandb
 pydantic-core==2.23.4
     # via pydantic
@@ -269,6 +275,7 @@ requests==2.31.0
     #   datasets
     #   evaluate
     #   huggingface-hub
+    #   spaces
     #   transformers
     #   wandb
 rich==13.9.3
@@ -281,6 +288,7 @@ rpds-py==0.22.3
     #   referencing
 ruff==0.8.2
     # via gradio
+safehttpx==0.1.6
 safetensors==0.4.5
     # via
     #   accelerate
@@ -309,6 +317,7 @@ sniffio==1.3.1
     #   anyio
     #   httpx
     #   openai
+spaces==0.31.0
 starlette==0.41.2
     # via fastapi
 sympy==1.13.1
@@ -353,6 +362,7 @@ typing-extensions==4.12.2
     #   pydantic
     #   pydantic-core
     #   rich
+    #   spaces
     #   torch
     #   typeguard
     #   typer
diff --git a/src/app/README.md b/src/app/README.md
new file mode 100644
index 0000000..47099eb
--- /dev/null
+++ b/src/app/README.md
@@ -0,0 +1,14 @@
+---
+title: Gemma 2 9B IT
+emoji: 😻
+colorFrom: indigo
+colorTo: pink
+sdk: gradio
+sdk_version: 5.8.0
+python_version: 3.10
+app_file: app.py
+pinned: false
+short_description: Chatbot
+---
+
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
diff --git a/src/app/app.py b/src/app/app.py
new file mode 100644
index 0000000..df6975e
--- /dev/null
+++ b/src/app/app.py
@@ -0,0 +1,162 @@
+"""Chat interface demo for Google Gemma 2 9B IT model.
+
+Cloned and adapted from the demo: https://huggingface.co/spaces/huggingface-projects/gemma-2-9b-it/tree/main/app.py
+"""
+
+import os
+from threading import Thread
+from typing import Iterator
+
+import gradio as gr
+import spaces
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
+
+DESCRIPTION = """
+This is a demo for the Google Gemma 2 9B IT model. Use it to generate mnemonics for English words you want to learn and remember.
+Input your instructions or start with one of the examples provided. The input supports a subset of markdown formatting such as bold, italics, code, tables. You can also use the following special tokens to customize the mnemonic:
+"""
+
+MAX_MAX_NEW_TOKENS = 2048
+DEFAULT_MAX_NEW_TOKENS = 1024
+MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
+
+device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+
+model_id = "google/gemma-2-9b-it"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_pretrained(
+    model_id,
+    device_map="auto",
+    torch_dtype=torch.bfloat16,
+)
+model.config.sliding_window = 4096
+model.eval()
+
+
+@spaces.GPU(duration=90)
+def generate(
+    message: str,
+    chat_history: list[dict],
+    max_new_tokens: int = 1024,
+    temperature: float = 0.6,
+    top_p: float = 0.9,
+    top_k: int = 50,
+    repetition_penalty: float = 1.2,
+) -> Iterator[str]:
+    """Generate a response to a message using the model.
+
+    Args:
+        message: The message to respond to.
+        chat_history: The conversation history.
+        max_new_tokens: The maximum number of tokens to generate.
+        temperature: The temperature for sampling.
+        top_p: The top-p value for nucleus sampling.
+        top_k: The top-k value for sampling.
+        repetition_penalty: The repetition penalty.
+
+    Yields:
+        Iterator[str]: The generated response.
+    """
+    conversation = chat_history.copy()
+    conversation.append({"role": "user", "content": message})
+
+    input_ids = tokenizer.apply_chat_template(
+        conversation, add_generation_prompt=True, return_tensors="pt"
+    )
+    if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
+        input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
+        gr.Warning(
+            f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens."
+        )
+    input_ids = input_ids.to(model.device)
+
+    streamer = TextIteratorStreamer(
+        tokenizer, timeout=20.0, skip_prompt=True, skip_special_tokens=True
+    )
+    generate_kwargs = dict(
+        {"input_ids": input_ids},
+        streamer=streamer,
+        max_new_tokens=max_new_tokens,
+        do_sample=True,
+        top_p=top_p,
+        top_k=top_k,
+        temperature=temperature,
+        num_beams=1,
+        repetition_penalty=repetition_penalty,
+    )
+    t = Thread(target=model.generate, kwargs=generate_kwargs)
+    t.start()
+
+    outputs = []
+    for text in streamer:
+        outputs.append(text)
+        yield "".join(outputs)
+
+
+chat_interface = gr.ChatInterface(
+    fn=generate,
+    additional_inputs=[
+        gr.Slider(
+            label="Max new tokens",
+            minimum=1,
+            maximum=MAX_MAX_NEW_TOKENS,
+            step=1,
+            value=DEFAULT_MAX_NEW_TOKENS,
+        ),
+        gr.Slider(
+            label="Temperature",
+            minimum=0.1,
+            maximum=4.0,
+            step=0.1,
+            value=0.6,
+        ),
+        gr.Slider(
+            label="Top-p (nucleus sampling)",
+            minimum=0.05,
+            maximum=1.0,
+            step=0.05,
+            value=0.9,
+        ),
+        gr.Slider(
+            label="Top-k",
+            minimum=1,
+            maximum=1000,
+            step=1,
+            value=50,
+        ),
+        gr.Slider(
+            label="Repetition penalty",
+            minimum=1.0,
+            maximum=2.0,
+            step=0.05,
+            value=1.2,
+        ),
+    ],
+    stop_btn=True,
+    examples=[
+        [
+            "Produce a cue to help me learn and retrieve the meaning of this word whenever I look at it (and nothing else): preposterous"
+        ],
+        [
+            "Create a cue that elicits vivid mental image for the word 'observient' so I could remember its meaning."
+        ],
+        [
+            "I need a mnemonic for 'dilapidated' to learn its meaning and contextual usage."
+        ],
+        [
+            "Help me remember the meaning of 'encapsulate' by connecting it to its etymology or related words."
+        ],
+    ],
+    cache_examples=False,
+    type="messages",
+)
+
+with gr.Blocks(css_paths="style.css", fill_height=True) as demo:
+    gr.Markdown(DESCRIPTION)
+    (chat_interface.render(),)
+    gr.ClearButton(elem_id="clear-button")
+
+
+if __name__ == "__main__":
+    demo.queue(max_size=20).launch()
diff --git a/src/app/app2.py b/src/app/app2.py
new file mode 100644
index 0000000..0d67a78
--- /dev/null
+++ b/src/app/app2.py
@@ -0,0 +1,70 @@
+"""Gradio interface for generating mnemonics from instructions.
+
+TODO: Combine this interface with the chatbot interface in app.py.
+"""
+
+import gradio as gr
+from transformers import AutoModelForCausalLM, AutoTokenizer
+
+model_name = "chiffonng/gemma2-9b-it-mnemonics"
+
+# Load model and tokenizer
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModelForCausalLM.from_pretrained(model_name)
+
+
+def generate_text(instruction: str) -> str:
+    """Generate mnemonic from user input/instruction.
+
+    Args:
+        instruction (str): User instructions to generate mnemonic.
+
+    Returns:
+        str: Generated mnemonic text.
+    """
+    inputs = tokenizer.encode(instruction, return_tensors="pt")
+    outputs = model.generate(inputs, max_length=256)
+    return tokenizer.decode(outputs[0], skip_special_tokens=True)
+
+
+# Create simple Gradio interface
+demo = gr.Interface(
+    fn=generate_text,
+    inputs=gr.Textbox(label="Instruction"),
+    outputs=gr.Textbox(label="Output"),
+    title="Mnemonic Generation",
+    description="Enter an instruction to generate mnemonic text.",
+)
+
+
+def chatbot_response(message: str, history: list) -> list:
+    """Generates a response from the chatbot based on the input message and updates the conversation history.
+
+    Args:
+        message (str): The input message from the user.
+        history (list): The conversation history, a list of tuples where each tuple contains a user message and a chatbot response.
+
+    Returns:
+        list: The updated conversation history with the new message and response appended.
+    """
+    inputs = tokenizer.encode(message, return_tensors="pt")
+    outputs = model.generate(inputs, max_length=100)
+    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    history.append((message, response))
+    return history
+
+
+# Create Gradio ChatInterface
+chatbot = gr.ChatInterface(
+    fn=chatbot_response,
+    title="Mnemonic Generation Chatbot",
+    description="Chat with the model to generate mnemonics.",
+    retry_btn=True,
+    undo_btn=True,
+    clear_btn=True,
+)
+
+
+# Launch the interface
+demo.launch()
+# chatbot.launch()
diff --git a/src/data/data_loaders.py b/src/data/data_loaders.py
index 18d71b7..a2bd99f 100644
--- a/src/data/data_loaders.py
+++ b/src/data/data_loaders.py
@@ -6,6 +6,8 @@
 from datasets import ClassLabel, DatasetDict, load_dataset
 
 if TYPE_CHECKING:
+    from typing import Optional
+
     from datasets import Dataset
 
 import utils.constants as c
@@ -59,7 +61,7 @@ def load_local_dataset(file_path: PathLike, **kwargs) -> "Dataset":
 
 
 def load_hf_dataset(
-    repo_id: str = None,
+    repo_id: Optional[str] = None,
     to_csv: bool = False,
     file_path: PathLike = None,
     **kwargs,
@@ -78,7 +80,7 @@ def load_hf_dataset(
     login_hf_hub()
 
     if repo_id is None:
-        repo_id = c.HF_DATASET_REPO
+        repo_id = c.HF_DATASET_NAME
 
     logger.info(f"Loading dataset from {repo_id}.")
     dataset = load_dataset(repo_id, **kwargs)
diff --git a/src/data/mnemonic_processing.py b/src/data/mnemonic_processing.py
index 5706a97..46253cb 100644
--- a/src/data/mnemonic_processing.py
+++ b/src/data/mnemonic_processing.py
@@ -2,6 +2,7 @@
 
 import logging
 from pathlib import Path
+from typing import TYPE_CHECKING, no_type_check
 from warnings import warn
 
 import pandas as pd
@@ -21,6 +22,10 @@
 from typing_extensions import Annotated
 from yaml import safe_load
 
+if TYPE_CHECKING:
+    from openai import Response
+
+from utils.aliases import PathLike
 from utils.constants import (
     CLASSIFIED_DATASET_CSV,
     CLASSIFIED_DATASET_PARQUET,
@@ -46,7 +51,7 @@
 client = OpenAI()
 
 # Load config and prompts
-with Path.open("config/classify_mnemonics.yaml", "r") as f:
+with Path("config/classify_mnemonics.yaml").open("r") as f:
     classification_conf = safe_load(f)  # dict of config
     batch_size = classification_conf["batch_size"]
 
@@ -66,20 +71,20 @@ class ClassificationSchema(BaseModel):
     classifications: list[ValidClassification]
 
 
-def combine_key_value(path: str) -> list[str]:
+def combine_key_value(path: PathLike) -> list[str]:
     """Load 2-column data from a file, to format: key: value.
 
     Args:
-        path (str): The path to the file containing the 2-column data.
+        path (PathLike): The path to the file containing the 2-column data.
 
     Returns:
         combined_col (list[str]): The combined key and value columns.
     """
-    path = check_file_path(path, extensions=[PARQUET_EXT, CSV_EXT])
+    path_obj: Path = check_file_path(path, extensions=[PARQUET_EXT, CSV_EXT])
 
-    if path.suffix == PARQUET_EXT:
+    if path_obj.suffix == PARQUET_EXT:
         df = pd.read_parquet(path, engine="pyarrow")
-    elif path.suffix == CSV_EXT:
+    elif path_obj.suffix == CSV_EXT:
         df = pd.read_csv(path, header="infer", quotechar='"')
 
     logger.info(f"Read {df.shape[0]} rows from {str(path)}.")
@@ -137,7 +142,7 @@ def create_batches(data: list[str], batch_size=batch_size) -> list[str]:
     before=before_log(logger, logging.WARNING),
     after=after_log(logger, logging.WARNING),
 )
-def classify_mnemonics_api(batches: list[str]):
+def classify_mnemonics_api(batches: str | list[str]):
     """Classify mnemonics using OpenAI's API, GPT-4o mini and return the responses as JSON array of numbers. Retry up to 3 times if rate limited.
 
     Args:
@@ -182,7 +187,7 @@ def get_structured_response(
     batch: str,
     model_config: dict,
     response_format: BaseModel = ClassificationSchema,
-):
+) -> dict:  # mypy: ignore
     """Get response from OpenAI API. Documentation: https://platform.openai.com/docs/guides/structured-outputs/how-to-use.
 
     Args:
@@ -192,7 +197,7 @@ def get_structured_response(
         response_format (BaseModel, optional): The response format. Defaults to ClassificationSchema.
 
     Returns:
-        structure_msg (message object from OpenAI's Response object): A structured message object.
+        structure_msg (dict = openai.Response...message): The structured message object.
     """
     try:
         structure_msg = (
@@ -202,7 +207,7 @@ def get_structured_response(
                     {"role": "system", "content": model_config["prompts"]["system"]},
                     {
                         "role": "user",
-                        "content": f"{model_config["prompts"]["user"]}{batch}",
+                        "content": model_config["prompts"]["user"] + batch,
                     },
                 ],
                 max_tokens=batch_size * 3 + 1,  # 3 tokens per mnemonic
@@ -230,18 +235,21 @@ def get_structured_response(
             raise e
 
 
+@no_type_check
 def parse_structured_response(
-    structure_msg: object, batch: str, batch_index: int
-) -> list[int | str]:
+    structure_msg: object,
+    batch: str,
+    batch_index: int,
+) -> list[int]:
     """Parse the structured message from OpenAI's API.
 
     Args:
-        structure_msg (message object from OpenAI's Response object): A structured message object.
+        structure_msg (openai.Response...message): The structured message object.
         batch (str): The batch of mnemonics.
         batch_index (int): The index of the batch.
 
     Returns:
-        classification_batch_i (list[int|str]): The list of parsed categories.
+        (list[int]): The list of parsed categories.
     """
     try:
         if structure_msg.parsed:
@@ -286,14 +294,14 @@ def parse_structured_response(
 
 
 def save_structured_outputs(
-    outputs: list[ValidClassification], input_path: str | Path, output_path: str | Path
+    outputs: list[ValidClassification], input_path: PathLike, output_path: PathLike
 ):
     """Save the classification results to an existing file of mnemonics.
 
     Args:
         outputs (list[ValidClassification]): The list of parsed categories.
-        input_path (str | Path): The path to the file containing the mnemonics.
-        output_path (str | Path): The path to .csv or .parquet file to write the parsed.
+        input_path (PathLike): The path to the file containing the mnemonics.
+        output_path (PathLike): The path to .csv or .parquet file to write the parsed.
 
     Raises:
         ValueError: If the output file is not in parquet or csv format.
diff --git a/src/utils/constants.py b/src/utils/constants.py
index 114a900..5993da8 100644
--- a/src/utils/constants.py
+++ b/src/utils/constants.py
@@ -24,8 +24,9 @@
 CATEGORY_NAMES = ["unsure", "shallow-encoding", "deep-encoding", "mixed"]
 CATEGORY_DICT = {name: i for i, name in enumerate(CATEGORY_NAMES)}
 
-# Hugging Face datasets
-HF_DATASET_REPO = "chiffonng/mnemonic-sft"  # <user>/<dataset_name>
+# Hugging Face collection
+HF_DATASET_NAME = "chiffonng/en-vocab-mnemonics"  # <user>/<dataset_name>
+HF_MODEL_NAME = "chiffonng/gemma2-9b-it-mnemonics"  # <user>/<model_name>
 
 # Model paths
-CHECKPOINT_DIR = "ckpt"
+OUTPUT_DIR = "output"
diff --git a/src/utils/error_handling.py b/src/utils/error_handling.py
index 3fc1761..458366d 100644
--- a/src/utils/error_handling.py
+++ b/src/utils/error_handling.py
@@ -2,7 +2,7 @@
 
 from enum import Enum
 from pathlib import Path
-from typing import TypeAlias
+from typing import Optional, TypeAlias
 from warnings import warn
 
 from utils.aliases import ExtensionsType, PathLike
@@ -51,7 +51,7 @@ def validate_and_normalize_extensions(extensions: ExtensionsType) -> list[str]:
     return extensions
 
 
-def check_extension(path: Path, extensions: list[str]):
+def check_extension(path: Path, extensions: ExtensionsType) -> None:
     """Check if the path has one of the allowed extensions."""
     if extensions and path.suffix not in extensions:
         raise ValueError(
@@ -92,7 +92,7 @@ def check_file_path(
 def check_dir_path(
     dir_path: PathLike,
     new_ok: bool = False,
-    extensions: list[str] = None,
+    extensions: Optional[list[str]] = None,
 ) -> Path | list[Path]:
     """Check if the directory path exists, convert it to a Path object if it is a string, and return it. Optionally, check if the directory contains files with the specified extensions.
 
@@ -129,21 +129,21 @@ def check_dir_path(
 
 
 def which_file_exists(
-    *files: list[Path] | list[str], extensions: ExtensionsType = None
+    *files: list[PathLike], extensions: Optional[ExtensionsType] = None
 ) -> Path:
     """Return the first file found in the list of files. Optionally, return the first file with the specified extensions.
 
     Args:
-        files (list[Path] | list[str]): The list of files to check.
+        files (list[PathLike]): The list of files to check.
         extensions (list[str], optional): A list of allowed file extensions. Defaults to [].
 
     Returns:
-        file (Path): The first file found in the list.
+        file_path (Path): The first file found in the list.
     """
     for file in files:
-        file = check_file_path(file, new_ok=True, extensions=extensions)
-        if file.exists():
-            return file
+        file_path: Path = check_file_path(file, new_ok=True, extensions=extensions)
+        if file_path.exists():
+            return file_path
 
     raise FileNotFoundError(
         f"None of the specified files were found: {[str(p) for p in files]}."
diff --git a/uv.lock b/uv.lock
index af1e6e4..ea03586 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1006,6 +1006,43 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/95/04/ff642e65ad6b90db43e668d70ffb6736436c7ce41fcc549f4e9472234127/h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761", size = 58259 },
 ]
 
+[[package]]
+name = "hf-transfer"
+version = "0.1.8"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/d3/0e/ba51e31148f0a9bc8d44878086535c2dc6d9a8dce321250e9bcdd3c110ea/hf_transfer-0.1.8.tar.gz", hash = "sha256:26d229468152e7a3ec12664cac86b8c2800695fd85f9c9a96677a775cc04f0b3", size = 23595 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/4f/eb/469e68c4259c4f4ad8e00967ad2f72ff1ba5e2712b4e1093e3e03c5cbc3d/hf_transfer-0.1.8-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:70858f9e94286738ed300484a45beb5cfee6a7ddac4c5886f9c6fce7823ac5ab", size = 1422386 },
+    { url = "https://files.pythonhosted.org/packages/bd/3d/5e8966b47aa86cd50f2017c76c2634aa09a437224567f379bc28d6580d7c/hf_transfer-0.1.8-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:38adc73f0a8526319d90f7cc5dc2d5e4bb66f487a513d94b98aa6725be732e4a", size = 1406027 },
+    { url = "https://files.pythonhosted.org/packages/61/e0/fd5f849ed7b2bf9b2bb008f3df3ee5a8773ca98362302833708cce26c337/hf_transfer-0.1.8-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:44d2f0c08198d8d899fe9d66e86aee2dd844bd7ce33888f261373fcec81d2a54", size = 3781136 },
+    { url = "https://files.pythonhosted.org/packages/d5/e9/fad10fb8b04c91cb8775b850f2bc578a1fb6168e2ab2b04ebb8525466159/hf_transfer-0.1.8-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:1de2a4ef36f9e60b3d3bec00193c0aafd75771709f2ca51b9b162373f5af3d32", size = 3099910 },
+    { url = "https://files.pythonhosted.org/packages/8c/ae/8a608949a87280ed14f0f5e0adbeccab54a7ea3d3aabdf77ec38544dd44f/hf_transfer-0.1.8-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e319269e3606a5ff2979296841766649ac73598a4a8eee2a968f86c8071fea5a", size = 3589277 },
+    { url = "https://files.pythonhosted.org/packages/81/ca/855ea35c9f997b500acd1baf6d6920ead00a0b7a8fccdcac74fe7e4f66d9/hf_transfer-0.1.8-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0f6026cf3be6a53ea42f92172f60c1c0675baaa9073f865e671b661dde5fd157", size = 3409983 },
+    { url = "https://files.pythonhosted.org/packages/5e/89/863f333b49603cc8d3c8862a428cc8fbaa9388ac8f076e9fa5ef3e729c3c/hf_transfer-0.1.8-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f865c33ada5bd3650c2b46e59979f2d7755c3f517f8d0facc78576a0c7d26406", size = 3562732 },
+    { url = "https://files.pythonhosted.org/packages/95/93/8137b83bd4ca6b1b4dab36e42af8c19d62c98ff8837306429547a92cbde0/hf_transfer-0.1.8-cp310-none-win32.whl", hash = "sha256:2054730e8d8ed21917c64be7199e06424b2bd08df1c43a72766afaed7992f2d3", size = 1129924 },
+    { url = "https://files.pythonhosted.org/packages/da/36/7583964f7cb0671071488f358dd388a8ef21f3a9bfe2e3596dac199010fc/hf_transfer-0.1.8-cp310-none-win_amd64.whl", hash = "sha256:2b4f1a9446ba31170b5b1eca4e916504d18378a6b5fe959896bdac8a736a5ecb", size = 1209808 },
+    { url = "https://files.pythonhosted.org/packages/72/94/d1c3d383536051f61a5d1d50bbc848a5c165d67d94bde0286ea343d5e00a/hf_transfer-0.1.8-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:e27c15fcc5869ad7e52bbc0bdec6106b288d1c463f8d2da92f28615a3b181361", size = 1422132 },
+    { url = "https://files.pythonhosted.org/packages/a0/a0/d10411151752499381052dbaf99fcbaefa8aaa3b5912b0535eea92d4699c/hf_transfer-0.1.8-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:871a0032d011ebc6409a73a8406b98b84ff2cd3ed7d9e1af8cdf4d660b9fab9b", size = 1405922 },
+    { url = "https://files.pythonhosted.org/packages/85/df/70543e805988b8a1085830e7f5ca290cc7a72c869b4ac2be1a4b619435aa/hf_transfer-0.1.8-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:686fa756e1e0214bb6327d33c66732c52274d94a8460beb50604ad988b391cf6", size = 3780881 },
+    { url = "https://files.pythonhosted.org/packages/93/c9/6920e63df88b2acaa3a4b0b616edca476ef8525d38d6f71437c0c9992b5d/hf_transfer-0.1.8-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:36a03b1b2911b0cf15b1b9d971a34b32dadcc4f2fd979aaff5979d6ce4017c34", size = 3099659 },
+    { url = "https://files.pythonhosted.org/packages/7d/b0/f2a85771491de8f887e71ba8769d9fa15c53cadf4c0959954735f5f6e71b/hf_transfer-0.1.8-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:079db90c81f41f4cf3227dfaaa855a9b8e9aef45bc7c2be29ce7232cd83ff881", size = 3588878 },
+    { url = "https://files.pythonhosted.org/packages/d8/36/cf7bd093988bdb530abbbfddd4cac80e3ccee4d80454af24fc0913bf2033/hf_transfer-0.1.8-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ac08a4524127fdd14c234d4bcbe49d1c498acf5335c781714823179bcc8dc039", size = 3409342 },
+    { url = "https://files.pythonhosted.org/packages/30/61/b38643f305e1f0f76c8894cec38d5d39d0d6265a75cc9de0a94917ddff3d/hf_transfer-0.1.8-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:837432e73cb17274a6782b6216e8ce058aa325a475dc44a5a6a753d48b86d18a", size = 3562382 },
+    { url = "https://files.pythonhosted.org/packages/cd/66/723bc1eeca445a1ce5cf72026f45f8a7ae656a1e47fce026cca92e31dbd5/hf_transfer-0.1.8-cp311-none-win32.whl", hash = "sha256:b180f9823dde35aba9bc0f1d0c04ac8a873baebd3732a7ffe4f11940abc7df0d", size = 1129916 },
+    { url = "https://files.pythonhosted.org/packages/dd/7e/139527d276416bdeb08546cdcbd6f3e02326f3a6a6c2f00c71300a709e71/hf_transfer-0.1.8-cp311-none-win_amd64.whl", hash = "sha256:37907d2135cebcf8b6d419bb575148d89c224f16b69357f027bd29d0e85c6529", size = 1209794 },
+    { url = "https://files.pythonhosted.org/packages/5b/d6/54c9ea16c782cb79cdae78500c0a4bc7474236f94537ee954771e6e86c8c/hf_transfer-0.1.8-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:baf948f4f493949309cbe60529620b9b0aef854a22b6e526753364acc57c09b6", size = 1424195 },
+    { url = "https://files.pythonhosted.org/packages/63/57/09e2aa7fa63bc640d9c3fda2cc724744b46227d239bb4ae9bf33efc338c2/hf_transfer-0.1.8-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0bce5c8bdefa478c5d5eaa646cc4ce1df5cfe764d98572ad0c6b8773e98d49f6", size = 1408105 },
+    { url = "https://files.pythonhosted.org/packages/19/72/f247f9632410d8b9655332b2007924557c293094ea91648336f49403afe7/hf_transfer-0.1.8-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:54d6f8a1a86128d651a3799e1267c343d60f81f2c565d7c5416eb8e674e4cf0e", size = 3782066 },
+    { url = "https://files.pythonhosted.org/packages/d0/cf/8eccb6fcff8eedd79334ffaf65c44109e8bece1ecc232c1036de697d51fa/hf_transfer-0.1.8-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f79fd1b0c2ed93efb4c5f684118d7a762ecdd218e170df8208c4e13d3dcd4959", size = 3103992 },
+    { url = "https://files.pythonhosted.org/packages/23/e8/f5d4ef6febc9ece1099e1f8de64f05f4d9f5b62461c4e54aac324a94d1ab/hf_transfer-0.1.8-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:414df35692670683bf5623498ef9d88a8df5d77e9516515da6e2b34d1054c11f", size = 3590083 },
+    { url = "https://files.pythonhosted.org/packages/aa/de/cd8b36ecfd1c40119f307cb0dfd4ca5cd437beb8c92219d52a4253e0059a/hf_transfer-0.1.8-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3c9798d5f951f66b96d40a7a53910260cb5874fda56cf5944dddb7c571f37ec3", size = 3406261 },
+    { url = "https://files.pythonhosted.org/packages/37/7f/914b684779dae9d2db4cdb6efa50426da7411754d820b8ddc9c10eef5042/hf_transfer-0.1.8-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:060c661691f85a61392e57579c80eb64b5ee277434e81fb582f605c1c8ff05d5", size = 3560705 },
+    { url = "https://files.pythonhosted.org/packages/de/17/e9ff11be0ab52d113091462f65fa280bd5c04c80e5b1dadb7f8de9645848/hf_transfer-0.1.8-cp312-none-win32.whl", hash = "sha256:f7840e32379820c3e1571a480238e05ea043e970c99d2e999578004a2eb17788", size = 1130448 },
+    { url = "https://files.pythonhosted.org/packages/58/60/04c18bbeb46cc2dc6fd237323c03f2e4c700bca122f28567dbb344ff5bab/hf_transfer-0.1.8-cp312-none-win_amd64.whl", hash = "sha256:9a3204ec423cc5e659872e8179f8704ad9ce2abb1e6a991f8838aedf1dc07830", size = 1206317 },
+    { url = "https://files.pythonhosted.org/packages/ae/e1/647dbd310042c11638ef330060777084f3394a82adc8274624b0f0601198/hf_transfer-0.1.8-pp310-pypy310_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:928ff036c3e98e10dcfbdb4fcdfc4592d37a5cc8e365a7ba8dfd4337e849d675", size = 3591149 },
+    { url = "https://files.pythonhosted.org/packages/13/c4/aaf060b26e720a7b4cb90d7f02dc18a56b18894cbd72fb610f75b11fb9dc/hf_transfer-0.1.8-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d49ba3ce67035f460ae1924fe2feafec155cb535eec7f31ed5109c19064cd294", size = 3564510 },
+]
+
 [[package]]
 name = "httpcore"
 version = "1.0.6"
@@ -1466,10 +1503,12 @@ name = "mnemonic-gen"
 version = "0.2.0"
 source = { virtual = "." }
 dependencies = [
+    { name = "accelerate" },
     { name = "datasets" },
     { name = "evaluate" },
     { name = "gradio", version = "4.26.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform != 'linux'" },
     { name = "gradio", version = "5.4.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'linux'" },
+    { name = "hf-transfer" },
     { name = "numpy" },
     { name = "openai" },
     { name = "peft" },
@@ -1477,6 +1516,7 @@ dependencies = [
     { name = "python-dotenv" },
     { name = "pyyaml" },
     { name = "ruff" },
+    { name = "spaces" },
     { name = "tenacity" },
     { name = "torch" },
     { name = "tqdm" },
@@ -1495,9 +1535,11 @@ dev = [
 
 [package.metadata]
 requires-dist = [
+    { name = "accelerate", specifier = ">=1.0.1" },
     { name = "datasets" },
     { name = "evaluate" },
     { name = "gradio", specifier = ">=4.26.0" },
+    { name = "hf-transfer", specifier = ">=0.1.8" },
     { name = "numpy", specifier = "<2.0.0" },
     { name = "openai", specifier = ">=1.57.0" },
     { name = "peft" },
@@ -1505,6 +1547,7 @@ requires-dist = [
     { name = "python-dotenv", specifier = ">=1.0.1" },
     { name = "pyyaml", specifier = ">=6.0.2" },
     { name = "ruff", specifier = ">=0.7.1" },
+    { name = "spaces", specifier = ">=0.31.0" },
     { name = "tenacity", specifier = ">=9.0.0" },
     { name = "torch", specifier = ">=2.5.1" },
     { name = "tqdm", specifier = ">=4.67.1" },
@@ -2943,6 +2986,25 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", size = 10235 },
 ]
 
+[[package]]
+name = "spaces"
+version = "0.31.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "gradio", version = "4.26.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform != 'linux'" },
+    { name = "gradio", version = "5.4.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'linux'" },
+    { name = "httpx" },
+    { name = "packaging" },
+    { name = "psutil" },
+    { name = "pydantic" },
+    { name = "requests" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/b9/57/73a23d582c7ba2f53a9394d7efa482144f026f549035ba2b70e3cb085e47/spaces-0.31.0.tar.gz", hash = "sha256:28c8ceee2437231e9279eedc057a13870432903c3ee7fcfb57b636bf34db2278", size = 21759 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ad/b4/9f3c2ba72d25bd54aa2f41d13ff840961d1c7cd3ab8aa3b82be933e1a87d/spaces-0.31.0-py3-none-any.whl", hash = "sha256:e0acf655d3a0209a3cec73b83ef1510a0b10b682cfb0b720a68d3ca1bb0f84e2", size = 28415 },
+]
+
 [[package]]
 name = "stack-data"
 version = "0.6.3"