diff --git a/01-models/Qwen3/Qwen3-VL/Qwen3-VL-2B-Instruct.ipynb b/01-models/Qwen3/Qwen3-VL/Qwen3-VL-2B-Instruct.ipynb
new file mode 100644
index 0000000..4efddd3
--- /dev/null
+++ b/01-models/Qwen3/Qwen3-VL/Qwen3-VL-2B-Instruct.ipynb
@@ -0,0 +1,575 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "94d68fb3-00f2-447c-b5e0-28fa18562392",
+   "metadata": {},
+   "source": [
+    "# Deploy the Qwen3-VL-2B-Instruct for inference using Amazon SageMakerAI\n",
+    "**Recommended kernel(s):** This notebook can be run with any Amazon SageMaker Studio kernel.\n",
+    "\n",
+    "In this notebook, you will learn how to deploy the Qwen3-VL-2B-Instruct model (HuggingFace model ID: [Qwen/Qwen3-VL-2B-Instruct](https://huggingface.co/Qwen/Qwen3-VL-2B-Instruct)) using Amazon SageMaker AI. \n",
+    "\n",
+    "Let's install or upgrade these dependencies using the following command:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fed1c92f-9fbc-47c9-b940-7dd07f962ef8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%pip install -Uq huggingface==4.49 sagemaker transformers==4.57.0"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "fc56cad7-ea97-43c4-835d-a56729549023",
+   "metadata": {},
+   "source": [
+    "### Setup"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "c128cf97-3f1f-4176-87b4-1193cd9e9c0f",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-10-28T19:19:08.231499Z",
+     "iopub.status.busy": "2025-10-28T19:19:08.231306Z",
+     "iopub.status.idle": "2025-10-28T19:19:09.646052Z",
+     "shell.execute_reply": "2025-10-28T19:19:09.645642Z",
+     "shell.execute_reply.started": "2025-10-28T19:19:08.231484Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml\n",
+      "sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml\n",
+      "2.253.1\n"
+     ]
+    }
+   ],
+   "source": [
+    "import os\n",
+    "import datetime\n",
+    "import sagemaker\n",
+    "import boto3\n",
+    "import logging\n",
+    "import json\n",
+    "import time\n",
+    "import shutil\n",
+    "import tarfile\n",
+    "\n",
+    "import sagemaker\n",
+    "from sagemaker.huggingface import HuggingFaceModel\n",
+    "from sagemaker.session import Session\n",
+    "from sagemaker.s3 import S3Uploader\n",
+    "\n",
+    "from huggingface_hub import snapshot_download\n",
+    "\n",
+    "print(sagemaker.__version__)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "bba6d4bf-5df9-4e3e-8a9e-a03897e0cb0f",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-10-28T19:19:09.646609Z",
+     "iopub.status.busy": "2025-10-28T19:19:09.646472Z",
+     "iopub.status.idle": "2025-10-28T19:19:10.114037Z",
+     "shell.execute_reply": "2025-10-28T19:19:10.113669Z",
+     "shell.execute_reply.started": "2025-10-28T19:19:09.646594Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Qwen-Qwen3-VL-2B-Instruct-endpoint-1761679149-892584\n",
+      "Saving model artifacts to sagemaker-us-east-1-329542461890/models/Qwen_Qwen3-VL-2B-Instruct\n"
+     ]
+    }
+   ],
+   "source": [
+    "session = sagemaker.Session()\n",
+    "role = sagemaker.get_execution_role()\n",
+    "\n",
+    "instance_type = \"ml.g5.4xlarge\"\n",
+    "instance_count = 1\n",
+    "\n",
+    "model_id = \"Qwen/Qwen3-VL-2B-Instruct\"\n",
+    "model_id_filesafe = model_id.replace(\"/\", \"_\").replace(\".\", \"_\")\n",
+    "endpoint_name = f\"{model_id_filesafe.replace(\"_\", \"-\")}-endpoint-{str(datetime.datetime.now().timestamp()).replace(\".\", \"-\")}\"\n",
+    "print(endpoint_name)\n",
+    "\n",
+    "image_uri = \"763104351884.dkr.ecr.us-east-1.amazonaws.com/djl-inference:0.34.0-lmi16.0.0-cu128-v1.2\"\n",
+    "\n",
+    "base_name = model_id.split('/')[-1].replace('.', '-').lower()\n",
+    "model_lineage = model_id.split('/')[0]\n",
+    "base_name\n",
+    "\n",
+    "bucket_name = session.default_bucket()\n",
+    "default_prefix = session.default_bucket_prefix or f\"models/{model_id_filesafe}\"\n",
+    "print(f\"Saving model artifacts to {bucket_name}/{default_prefix}\")\n",
+    "\n",
+    "os.makedirs(\"code\", exist_ok=True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0a7d4737-12ca-4cb7-aee4-06f7c0328b6d",
+   "metadata": {},
+   "source": [
+    "## Local Model Test"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "51db134d-19bb-4702-949e-cf8160fe917f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import torch\n",
+    "from transformers import Qwen3VLForConditionalGeneration, AutoProcessor\n",
+    "\n",
+    "model = Qwen3VLForConditionalGeneration.from_pretrained(\n",
+    "    \"Qwen/Qwen3-VL-2B-Instruct\",\n",
+    "    dtype=torch.float16,\n",
+    "    device_map=\"auto\",\n",
+    "    attn_implementation=\"sdpa\"\n",
+    ")\n",
+    "processor = AutoProcessor.from_pretrained(\"Qwen/Qwen3-VL-2B-Instruct\")\n",
+    "messages = [\n",
+    "    {\n",
+    "        \"role\":\"user\",\n",
+    "        \"content\":[\n",
+    "            {\n",
+    "                \"type\":\"image\",\n",
+    "                \"url\": \"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg\"\n",
+    "            },\n",
+    "            {\n",
+    "                \"type\":\"text\",\n",
+    "                \"text\":\"Describe this image.\"\n",
+    "            }\n",
+    "        ]\n",
+    "    }\n",
+    "\n",
+    "]\n",
+    "\n",
+    "inputs = processor.apply_chat_template(\n",
+    "    messages,\n",
+    "    tokenize=True,\n",
+    "    add_generation_prompt=True,\n",
+    "    return_dict=True,\n",
+    "    return_tensors=\"pt\",\n",
+    ")\n",
+    "inputs.pop(\"token_type_ids\", None)\n",
+    "\n",
+    "generated_ids = model.generate(**inputs, max_new_tokens=128)\n",
+    "generated_ids_trimmed = [\n",
+    "            out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)\n",
+    "]\n",
+    "output_text = processor.batch_decode(\n",
+    "       generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False\n",
+    ")\n",
+    "print(output_text)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "7245e684-aa6e-44ca-9451-10e0bee4b96e",
+   "metadata": {},
+   "source": [
+    "## Create SageMaker Model\n",
+    "Here we define the custom requirements and inference logic to be run by this model. We download the model assets from HuggingFace, zip them up and upload them to S3. We then deploy the model as a `HuggingFaceModel`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "410cb9a8-9fd0-408d-bebb-af8b8c09d398",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-10-28T19:19:13.048393Z",
+     "iopub.status.busy": "2025-10-28T19:19:13.048051Z",
+     "iopub.status.idle": "2025-10-28T19:19:13.051031Z",
+     "shell.execute_reply": "2025-10-28T19:19:13.050649Z",
+     "shell.execute_reply.started": "2025-10-28T19:19:13.048378Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "env = {\n",
+    "    'HF_MODEL_ID': model_id,\n",
+    "    'HF_TASK':'image-text-to-text',\n",
+    "    'SM_NUM_GPUS': json.dumps(1),\n",
+    "    'OPTION_TRUST_REMOTE_CODE': 'true',\n",
+    "    'OPTION_MODEL_LOADING_TIMEOUT': '3600',\n",
+    "    \"OPTION_ROLLING_BATCH\": \"disable\",\n",
+    "    \"OPTION_TENSOR_PARALLEL_DEGREE\": \"1\",\n",
+    "    \"OPTION_MAX_MODEL_LEN\": \"5000\",\n",
+    "    \"OPTION_ASYNC_MODE\": \"true\",\n",
+    "    \"OPTION_TRUST_REMOTE_CODE\": \"true\",\n",
+    "    \"SERVING_FAIL_FAST\": \"true\",\n",
+    "}\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "5d441971-621c-4d52-a7db-d67020a8667e",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-10-28T19:19:13.916283Z",
+     "iopub.status.busy": "2025-10-28T19:19:13.915921Z",
+     "iopub.status.idle": "2025-10-28T19:19:13.919378Z",
+     "shell.execute_reply": "2025-10-28T19:19:13.919024Z",
+     "shell.execute_reply.started": "2025-10-28T19:19:13.916253Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Overwriting code/requirements.txt\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%writefile code/requirements.txt\n",
+    "transformers==4.57.0\n",
+    "torch\n",
+    "torchvision\n",
+    "torchaudio\n",
+    "pillow\n",
+    "requests"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "a6468cba-5d10-484d-898a-19802612925c",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-10-28T19:51:30.543812Z",
+     "iopub.status.busy": "2025-10-28T19:51:30.543609Z",
+     "iopub.status.idle": "2025-10-28T19:51:30.546908Z",
+     "shell.execute_reply": "2025-10-28T19:51:30.546525Z",
+     "shell.execute_reply.started": "2025-10-28T19:51:30.543795Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Overwriting code/inference.py\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%writefile code/inference.py\n",
+    "# This code comes from HuggingFace\n",
+    "# https://huggingface.co/Qwen/Qwen3-VL-2B-Instruct\n",
+    "import logging\n",
+    "import torch\n",
+    "from transformers import Qwen3VLForConditionalGeneration, AutoProcessor\n",
+    "\n",
+    "logger = logging.getLogger()\n",
+    "logger.setLevel(logging.INFO)\n",
+    "\n",
+    "def model_fn(model_dir):\n",
+    "\n",
+    "    model = Qwen3VLForConditionalGeneration.from_pretrained(\n",
+    "        model_dir,\n",
+    "        dtype=torch.float16,\n",
+    "        device_map=\"auto\",\n",
+    "        attn_implementation=\"sdpa\"\n",
+    "    )\n",
+    "\n",
+    "    processor = AutoProcessor.from_pretrained(\n",
+    "        model_dir,\n",
+    "        trust_remote_code=True\n",
+    "    )\n",
+    "\n",
+    "    return {\"processor\": processor, \"model\": model}\n",
+    "\n",
+    "\n",
+    "def predict_fn(data, model_obj):\n",
+    "    processor = model_obj[\"processor\"]\n",
+    "    model = model_obj[\"model\"]\n",
+    "    messages = [\n",
+    "        {\n",
+    "            \"role\": \"user\",\n",
+    "            \"content\": [\n",
+    "                {\n",
+    "                    \"type\": \"image\",\n",
+    "                    \"image\": \"https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg\",\n",
+    "                },\n",
+    "                {\"type\": \"text\", \"text\": \"Describe this image.\"},\n",
+    "            ],\n",
+    "        }\n",
+    "    ]\n",
+    "\n",
+    "    inputs = processor.apply_chat_template(\n",
+    "        messages,\n",
+    "        tokenize=True,\n",
+    "        add_generation_prompt=True,\n",
+    "        return_dict=True,\n",
+    "        return_tensors=\"pt\"\n",
+    "    )\n",
+    "    inputs = inputs.to(model.device)\n",
+    "    generated_ids = model.generate(**inputs, max_new_tokens=128)\n",
+    "    generated_ids_trimmed = [\n",
+    "        out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)\n",
+    "    ]\n",
+    "    output_text = processor.batch_decode(\n",
+    "        generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False\n",
+    "    )\n",
+    "    print(output_text)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "3ba9db6b-3a56-462a-a513-770ed7f4bbcc",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-10-28T19:51:31.602686Z",
+     "iopub.status.busy": "2025-10-28T19:51:31.602482Z",
+     "iopub.status.idle": "2025-10-28T19:51:31.605398Z",
+     "shell.execute_reply": "2025-10-28T19:51:31.604909Z",
+     "shell.execute_reply.started": "2025-10-28T19:51:31.602672Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "def filter_function(tarinfo):\n",
+    "    \"\"\"Filter function to exclude .cache files and directories\"\"\"\n",
+    "    if '.cache' in tarinfo.name or '.gitattributes' in tarinfo.name:\n",
+    "        return None\n",
+    "    return tarinfo"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "ae9b8cdf-ebf9-4d05-837d-8a3f412b73d4",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-10-28T19:51:33.065578Z",
+     "iopub.status.busy": "2025-10-28T19:51:33.065191Z",
+     "iopub.status.idle": "2025-10-28T19:57:37.284971Z",
+     "shell.execute_reply": "2025-10-28T19:57:37.284528Z",
+     "shell.execute_reply.started": "2025-10-28T19:51:33.065543Z"
+    }
+   },
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "7dc899529b1b47008ef16bc6303b5b18",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Fetching 12 files:   0%|          | 0/12 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Successfully downloaded to /home/sagemaker-user/sagemaker-genai-hosting-examples/01-models/Qwen3/Qwen3-VL/model\n",
+      "Building gzipped tarball...\n",
+      "Successfully tarred the ball.\n",
+      "Uploading tarball to sagemaker-us-east-1-329542461890/models/Qwen_Qwen3-VL-2B-Instruct...\n",
+      "Successfully uploaded, working directory cleaned\n"
+     ]
+    }
+   ],
+   "source": [
+    "s3_client = boto3.client('s3')\n",
+    "key = f\"{default_prefix}/model.tar.gz\"\n",
+    "force_rebuild_tarball = True\n",
+    "\n",
+    "if force_rebuild_tarball or not s3_client.head_object(Bucket=bucket_name, Key=key):\n",
+    "    try:\n",
+    "        model_path = snapshot_download(repo_id=model_id, local_dir=\"./model\")\n",
+    "        print(f\"Successfully downloaded to {model_path}\")\n",
+    "    except Exception as e:\n",
+    "        print(f\"Failed to download after retries: {str(e)}\")\n",
+    "    \n",
+    "    print(\"Building gzipped tarball...\")\n",
+    "    with tarfile.open(\"./model.tar.gz\", \"w:gz\") as tar:\n",
+    "        tar.add(model_path, arcname=\".\", filter=filter_function)\n",
+    "        tar.add(\"./code\", filter=filter_function)\n",
+    "    print(\"Successfully tarred the ball.\")\n",
+    "    \n",
+    "    print(f\"Uploading tarball to {bucket_name}/{default_prefix}...\")\n",
+    "    s3_client.upload_file(\"./model.tar.gz\", bucket_name, f\"{default_prefix}/model.tar.gz\")\n",
+    "    # shutil.rmtree(\"./model\")\n",
+    "    # os.remove(\"./model.tar.gz\")\n",
+    "    print(\"Successfully uploaded, working directory cleaned\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "40af4b17-30e9-4138-93f3-0589e4770815",
+   "metadata": {},
+   "source": [
+    "## Deploy Model to SageMaker Endpoint\n",
+    "\n",
+    "Now we'll deploy our model to a SageMaker endpoint for real-time inference. This is a significant step that:\n",
+    "1. Provisions the specified compute resources (G5 instance)\n",
+    "2. Deploys the model container\n",
+    "3. Sets up the endpoint for API access\n",
+    "\n",
+    "### Deployment Configuration\n",
+    "- **Instance Count**: 1 instance for single-node deployment\n",
+    "- **Instance Type**: `ml.g5.4xlarge` for high-performance inference\n",
+    "\n",
+    "> ⚠️ **Important**: \n",
+    "> - Deployment can take up to 15 minutes\n",
+    "> - Monitor the CloudWatch logs for progress"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "7207d0d4-f2be-41c6-a989-3db36da256cf",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-10-28T19:57:37.288133Z",
+     "iopub.status.busy": "2025-10-28T19:57:37.287995Z",
+     "iopub.status.idle": "2025-10-28T20:10:17.947197Z",
+     "shell.execute_reply": "2025-10-28T20:10:17.946723Z",
+     "shell.execute_reply.started": "2025-10-28T19:57:37.288117Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-----------!"
+     ]
+    }
+   ],
+   "source": [
+    "# Hub Model configuration. https://huggingface.co/models\n",
+    "hub = {\n",
+    "\t'HF_MODEL_ID':'Qwen/Qwen3-VL-2B-Instruct',\n",
+    "\t'HF_TASK':'image-text-to-text'\n",
+    "}\n",
+    "\n",
+    "# create Hugging Face Model Class\n",
+    "huggingface_model = HuggingFaceModel(\n",
+    "    model_data=f\"s3://{bucket_name}/{default_prefix}/model.tar.gz\",\n",
+    "\ttransformers_version='4.49.0',\n",
+    "\tpytorch_version='2.6.0',\n",
+    "\tpy_version='py312',\n",
+    "\tenv=env,\n",
+    "\trole=role, \n",
+    "    entry_point=\"inference.py\",\n",
+    "    enable_network_isolation=False\n",
+    ")\n",
+    "\n",
+    "# deploy model to SageMaker Inference\n",
+    "predictor = huggingface_model.deploy(\n",
+    "\tinitial_instance_count=1, # number of instances\n",
+    "\tinstance_type='ml.g5.4xlarge'\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d8ef9603-a16d-4195-9702-b920762e2e9b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Using DJL Serving\n",
+    "# UNDER CONSTRUCTION\n",
+    "\n",
+    "# model = HuggingFaceModel(\n",
+    "#     model_data=f\"s3://{bucket_name}/{default_prefix}/model.tar.gz\",\n",
+    "#     image_uri=image_uri,\n",
+    "#     env=env,\n",
+    "#     role=role,\n",
+    "#     entry_point=\"inference.py\",\n",
+    "#     enable_network_isolation=False\n",
+    "# )\n",
+    "\n",
+    "# predictor = model.deploy(\n",
+    "#     initial_instance_count=instance_count,\n",
+    "#     instance_type=instance_type,\n",
+    "#     endpoint_name=endpoint_name\n",
+    "# )\n",
+    "\n",
+    "# predictor.predict()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f7b85c32-d4df-45f1-84f4-86a296ad1c0d",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-09-15T19:14:57.135928Z",
+     "iopub.status.busy": "2025-09-15T19:14:57.135661Z",
+     "iopub.status.idle": "2025-09-15T19:14:57.139468Z",
+     "shell.execute_reply": "2025-09-15T19:14:57.138566Z",
+     "shell.execute_reply.started": "2025-09-15T19:14:57.135907Z"
+    }
+   },
+   "source": [
+    "# Clean up"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bcef37c7-6b47-4c3b-b6b7-b266245b492e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "huggingface_model.delete_model()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/01-models/google/Owl/Owlv2-base-patch16.ipynb b/01-models/google/Owl/Owlv2-base-patch16.ipynb
new file mode 100644
index 0000000..8adbe11
--- /dev/null
+++ b/01-models/google/Owl/Owlv2-base-patch16.ipynb
@@ -0,0 +1,464 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "94d68fb3-00f2-447c-b5e0-28fa18562392",
+   "metadata": {},
+   "source": [
+    "# Deploy the Owlv2-base-patch16 for inference using Amazon SageMakerAI\n",
+    "**Recommended kernel(s):** This notebook can be run with any Amazon SageMaker Studio kernel.\n",
+    "\n",
+    "In this notebook, you will learn how to deploy the Qwen3-VL-2B-Instruct model (HuggingFace model ID: [google/owlv2-base-patch16](https://huggingface.co/google/owlv2-base-patch16)) using Amazon SageMaker AI. \n",
+    "\n",
+    "Let's install or upgrade these dependencies using the following command:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fed1c92f-9fbc-47c9-b940-7dd07f962ef8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%pip install -Uq huggingface==4.49 sagemaker transformers==4.57.0"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "fc56cad7-ea97-43c4-835d-a56729549023",
+   "metadata": {},
+   "source": [
+    "### Setup"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c128cf97-3f1f-4176-87b4-1193cd9e9c0f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import datetime\n",
+    "import sagemaker\n",
+    "import boto3\n",
+    "import logging\n",
+    "import json\n",
+    "import time\n",
+    "import shutil\n",
+    "import tarfile\n",
+    "\n",
+    "import sagemaker\n",
+    "from sagemaker.huggingface import HuggingFaceModel\n",
+    "from sagemaker.session import Session\n",
+    "from sagemaker.s3 import S3Uploader\n",
+    "\n",
+    "from huggingface_hub import snapshot_download\n",
+    "\n",
+    "print(sagemaker.__version__)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bba6d4bf-5df9-4e3e-8a9e-a03897e0cb0f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "session = sagemaker.Session()\n",
+    "role = sagemaker.get_execution_role()\n",
+    "\n",
+    "instance_type = \"ml.g5.4xlarge\"\n",
+    "instance_count = 1\n",
+    "\n",
+    "model_id = \"google/owlv2-base-patch16\"\n",
+    "model_id_filesafe = model_id.replace(\"/\", \"_\").replace(\".\", \"_\")\n",
+    "endpoint_name = f\"{model_id_filesafe.replace(\"_\", \"-\")}-endpoint-{str(datetime.datetime.now().timestamp()).replace(\".\", \"-\")}\"\n",
+    "print(endpoint_name)\n",
+    "\n",
+    "base_name = model_id.split('/')[-1].replace('.', '-').lower()\n",
+    "model_lineage = model_id.split('/')[0]\n",
+    "base_name\n",
+    "\n",
+    "bucket_name = session.default_bucket()\n",
+    "default_prefix = session.default_bucket_prefix or f\"models/{model_id_filesafe}\"\n",
+    "print(f\"Saving model artifacts to {bucket_name}/{default_prefix}\")\n",
+    "\n",
+    "os.makedirs(\"code\", exist_ok=True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0a7d4737-12ca-4cb7-aee4-06f7c0328b6d",
+   "metadata": {},
+   "source": [
+    "## Local Model Test"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "51db134d-19bb-4702-949e-cf8160fe917f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# This code is adapted from https://huggingface.co/google/owlv2-base-patch16\n",
+    "\n",
+    "import requests\n",
+    "from PIL import Image\n",
+    "import numpy as np\n",
+    "import torch\n",
+    "from transformers import AutoProcessor, Owlv2ForObjectDetection\n",
+    "from transformers.utils.constants import OPENAI_CLIP_MEAN, OPENAI_CLIP_STD\n",
+    "\n",
+    "processor = AutoProcessor.from_pretrained(\"google/owlv2-base-patch16\")\n",
+    "model = Owlv2ForObjectDetection.from_pretrained(\"google/owlv2-base-patch16\")\n",
+    "\n",
+    "url = \"http://images.cocodataset.org/val2017/000000039769.jpg\"\n",
+    "image = Image.open(requests.get(url, stream=True).raw)\n",
+    "texts = [[\"a photo of a cat\", \"a photo of a dog\"]]\n",
+    "inputs = processor(text=texts, images=image, return_tensors=\"pt\")\n",
+    "\n",
+    "# forward pass\n",
+    "with torch.no_grad():\n",
+    "    outputs = model(**inputs)\n",
+    "\n",
+    "# Note: boxes need to be visualized on the padded, unnormalized image\n",
+    "# hence we'll set the target image sizes (height, width) based on that\n",
+    "\n",
+    "def get_preprocessed_image(pixel_values):\n",
+    "    pixel_values = pixel_values.squeeze().numpy()\n",
+    "    unnormalized_image = (pixel_values * np.array(OPENAI_CLIP_STD)[:, None, None]) + np.array(OPENAI_CLIP_MEAN)[:, None, None]\n",
+    "    unnormalized_image = (unnormalized_image * 255).astype(np.uint8)\n",
+    "    unnormalized_image = np.moveaxis(unnormalized_image, 0, -1)\n",
+    "    unnormalized_image = Image.fromarray(unnormalized_image)\n",
+    "    return unnormalized_image\n",
+    "\n",
+    "unnormalized_image = get_preprocessed_image(inputs.pixel_values)\n",
+    "\n",
+    "target_sizes = torch.Tensor([unnormalized_image.size[::-1]])\n",
+    "# Convert outputs (bounding boxes and class logits) to final bounding boxes and scores\n",
+    "results = processor.post_process_object_detection(\n",
+    "    outputs=outputs, threshold=0.2, target_sizes=target_sizes\n",
+    ")\n",
+    "\n",
+    "i = 0  # Retrieve predictions for the first image for the corresponding text queries\n",
+    "text = texts[i]\n",
+    "boxes, scores, labels = results[i][\"boxes\"], results[i][\"scores\"], results[i][\"labels\"]\n",
+    "\n",
+    "for box, score, label in zip(boxes, scores, labels):\n",
+    "    box = [round(i, 2) for i in box.tolist()]\n",
+    "    print(f\"Detected {text[label]} with confidence {round(score.item(), 3)} at location {box}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "7245e684-aa6e-44ca-9451-10e0bee4b96e",
+   "metadata": {},
+   "source": [
+    "## Create SageMaker Model\n",
+    "Here we define the custom requirements and inference logic to be run by this model. We download the model assets from HuggingFace, zip them up and upload them to S3. We then deploy the model as a `HuggingFaceModel`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "410cb9a8-9fd0-408d-bebb-af8b8c09d398",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "env = {\n",
+    "    'HF_MODEL_ID': model_id,\n",
+    "    'HF_TASK':'image-text-to-text',\n",
+    "    'SM_NUM_GPUS': json.dumps(1),\n",
+    "    'OPTION_TRUST_REMOTE_CODE': 'true',\n",
+    "    'OPTION_MODEL_LOADING_TIMEOUT': '3600',\n",
+    "    \"OPTION_ROLLING_BATCH\": \"disable\",\n",
+    "    \"OPTION_TENSOR_PARALLEL_DEGREE\": \"1\",\n",
+    "    \"OPTION_MAX_MODEL_LEN\": \"5000\",\n",
+    "    \"OPTION_ASYNC_MODE\": \"true\",\n",
+    "    \"OPTION_TRUST_REMOTE_CODE\": \"true\",\n",
+    "    \"SERVING_FAIL_FAST\": \"true\",\n",
+    "}\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5d441971-621c-4d52-a7db-d67020a8667e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%writefile code/requirements.txt\n",
+    "transformers==4.57.0\n",
+    "torch\n",
+    "torchvision\n",
+    "torchaudio\n",
+    "pillow\n",
+    "requests"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a6468cba-5d10-484d-898a-19802612925c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%writefile code/inference.py\n",
+    "# This code comes from HuggingFace\n",
+    "# https://huggingface.co/google/owlv2-base-patch16\n",
+    "\n",
+    "import logging\n",
+    "import requests\n",
+    "from PIL import Image\n",
+    "import numpy as np\n",
+    "import torch\n",
+    "from transformers import AutoProcessor, Owlv2ForObjectDetection\n",
+    "from transformers.utils.constants import OPENAI_CLIP_MEAN, OPENAI_CLIP_STD\n",
+    "\n",
+    "logger = logging.getLogger()\n",
+    "logger.setLevel(logging.INFO)\n",
+    "\n",
+    "def model_fn(model_dir):\n",
+    "\n",
+    "    model = Owlv2ForObjectDetection.from_pretrained(\n",
+    "        model_dir,\n",
+    "        device_map=\"auto\"\n",
+    "    )\n",
+    "    \n",
+    "    processor = AutoProcessor.from_pretrained(\n",
+    "        model_dir,\n",
+    "        trust_remote_code=True\n",
+    "    )\n",
+    "\n",
+    "    return {\"processor\": processor, \"model\": model}\n",
+    "\n",
+    "\n",
+    "def predict_fn(data, model_obj):\n",
+    "    processor = model_obj[\"processor\"]\n",
+    "    model = model_obj[\"model\"]\n",
+    "    \n",
+    "    url = \"http://images.cocodataset.org/val2017/000000039769.jpg\"\n",
+    "    image = Image.open(requests.get(url, stream=True).raw)\n",
+    "    texts = [[\"a photo of a cat\", \"a photo of a dog\"]]\n",
+    "    inputs = processor(text=texts, images=image, return_tensors=\"pt\")\n",
+    "    \n",
+    "    # forward pass\n",
+    "    with torch.no_grad():\n",
+    "        outputs = model(**inputs)\n",
+    "    \n",
+    "    # Note: boxes need to be visualized on the padded, unnormalized image\n",
+    "    # hence we'll set the target image sizes (height, width) based on that\n",
+    "    \n",
+    "    def get_preprocessed_image(pixel_values):\n",
+    "        pixel_values = pixel_values.squeeze().numpy()\n",
+    "        unnormalized_image = (pixel_values * np.array(OPENAI_CLIP_STD)[:, None, None]) + np.array(OPENAI_CLIP_MEAN)[:, None, None]\n",
+    "        unnormalized_image = (unnormalized_image * 255).astype(np.uint8)\n",
+    "        unnormalized_image = np.moveaxis(unnormalized_image, 0, -1)\n",
+    "        unnormalized_image = Image.fromarray(unnormalized_image)\n",
+    "        return unnormalized_image\n",
+    "    \n",
+    "    unnormalized_image = get_preprocessed_image(inputs.pixel_values)\n",
+    "    \n",
+    "    target_sizes = torch.Tensor([unnormalized_image.size[::-1]])\n",
+    "    # Convert outputs (bounding boxes and class logits) to final bounding boxes and scores\n",
+    "    results = processor.post_process_object_detection(\n",
+    "        outputs=outputs, threshold=0.2, target_sizes=target_sizes\n",
+    "    )\n",
+    "    \n",
+    "    i = 0  # Retrieve predictions for the first image for the corresponding text queries\n",
+    "    text = texts[i]\n",
+    "    boxes, scores, labels = results[i][\"boxes\"], results[i][\"scores\"], results[i][\"labels\"]\n",
+    "    \n",
+    "    for box, score, label in zip(boxes, scores, labels):\n",
+    "        box = [round(i, 2) for i in box.tolist()]\n",
+    "        print(f\"Detected {text[label]} with confidence {round(score.item(), 3)} at location {box}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3ba9db6b-3a56-462a-a513-770ed7f4bbcc",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def filter_function(tarinfo):\n",
+    "    \"\"\"Filter function to exclude .cache files and directories\"\"\"\n",
+    "    if '.cache' in tarinfo.name or '.gitattributes' in tarinfo.name:\n",
+    "        return None\n",
+    "    return tarinfo"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ae9b8cdf-ebf9-4d05-837d-8a3f412b73d4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "s3_client = boto3.client('s3')\n",
+    "key = f\"{default_prefix}/model.tar.gz\"\n",
+    "force_rebuild_tarball = True\n",
+    "\n",
+    "if force_rebuild_tarball or not s3_client.head_object(Bucket=bucket_name, Key=key):\n",
+    "    try:\n",
+    "        model_path = snapshot_download(repo_id=model_id, local_dir=\"./model\")\n",
+    "        print(f\"Successfully downloaded to {model_path}\")\n",
+    "    except Exception as e:\n",
+    "        print(f\"Failed to download after retries: {str(e)}\")\n",
+    "    \n",
+    "    print(\"Building gzipped tarball...\")\n",
+    "    with tarfile.open(\"./model.tar.gz\", \"w:gz\") as tar:\n",
+    "        tar.add(model_path, arcname=\".\", filter=filter_function)\n",
+    "        tar.add(\"./code\", filter=filter_function)\n",
+    "    print(\"Successfully tarred the ball.\")\n",
+    "    \n",
+    "    print(f\"Uploading tarball to {bucket_name}/{default_prefix}...\")\n",
+    "    s3_client.upload_file(\"./model.tar.gz\", bucket_name, f\"{default_prefix}/model.tar.gz\")\n",
+    "    shutil.rmtree(\"./model\")\n",
+    "    os.remove(\"./model.tar.gz\")\n",
+    "    print(\"Successfully uploaded, working directory cleaned\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "40af4b17-30e9-4138-93f3-0589e4770815",
+   "metadata": {},
+   "source": [
+    "## Deploy Model to SageMaker Endpoint\n",
+    "\n",
+    "Now we'll deploy our model to a SageMaker endpoint for real-time inference. This is a significant step that:\n",
+    "1. Provisions the specified compute resources (G5 instance)\n",
+    "2. Deploys the model container\n",
+    "3. Sets up the endpoint for API access\n",
+    "\n",
+    "### Deployment Configuration\n",
+    "- **Instance Count**: 1 instance for single-node deployment\n",
+    "- **Instance Type**: `ml.g5.4xlarge` for high-performance inference\n",
+    "\n",
+    "> ⚠️ **Important**: \n",
+    "> - Deployment can take up to 15 minutes\n",
+    "> - Monitor the CloudWatch logs for progress"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7cbeaf71-b51c-4b65-8196-ba0f403eb2a8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Hub Model configuration. https://huggingface.co/models\n",
+    "hub = {\n",
+    "\t'HF_MODEL_ID':'google/owlv2-base-patch16',\n",
+    "\t'HF_TASK':'zero-shot-object-detection'\n",
+    "}\n",
+    "\n",
+    "# create Hugging Face Model Class\n",
+    "huggingface_model = HuggingFaceModel(\n",
+    "    model_data=f\"s3://{bucket_name}/{default_prefix}/model.tar.gz\",\n",
+    "\ttransformers_version='4.49.0',\n",
+    "\tpytorch_version='2.6.0',\n",
+    "\tpy_version='py312',\n",
+    "\tenv=env,\n",
+    "\trole=role, \n",
+    "    entry_point=\"inference.py\",\n",
+    "    enable_network_isolation=False\n",
+    ")\n",
+    "\n",
+    "# deploy model to SageMaker Inference\n",
+    "predictor = huggingface_model.deploy(\n",
+    "\tinitial_instance_count=1, # number of instances\n",
+    "\tinstance_type='ml.m5.xlarge' # ec2 instance type\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d8ef9603-a16d-4195-9702-b920762e2e9b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Using DJL Serving\n",
+    "# UNDER CONSTRUCTION\n",
+    "\n",
+    "# image_uri = \"763104351884.dkr.ecr.us-east-1.amazonaws.com/djl-inference:0.34.0-lmi16.0.0-cu128-v1.2\"\n",
+    "\n",
+    "# model = HuggingFaceModel(\n",
+    "#     model_data=f\"s3://{bucket_name}/{default_prefix}/model.tar.gz\",\n",
+    "#     image_uri=image_uri,\n",
+    "#     env=env,\n",
+    "#     role=role,\n",
+    "#     entry_point=\"inference.py\",\n",
+    "#     enable_network_isolation=False\n",
+    "# )\n",
+    "\n",
+    "# predictor = model.deploy(\n",
+    "#     initial_instance_count=instance_count,\n",
+    "#     instance_type=instance_type,\n",
+    "#     endpoint_name=endpoint_name\n",
+    "# )\n",
+    "\n",
+    "# predictor.predict()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f7b85c32-d4df-45f1-84f4-86a296ad1c0d",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-09-15T19:14:57.135928Z",
+     "iopub.status.busy": "2025-09-15T19:14:57.135661Z",
+     "iopub.status.idle": "2025-09-15T19:14:57.139468Z",
+     "shell.execute_reply": "2025-09-15T19:14:57.138566Z",
+     "shell.execute_reply.started": "2025-09-15T19:14:57.135907Z"
+    }
+   },
+   "source": [
+    "# Clean up"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bcef37c7-6b47-4c3b-b6b7-b266245b492e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "predictor.delete_endpoint(True)\n",
+    "huggingface_model.delete_model()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ac15a012-fa80-43d0-a8d2-cdfbf2082e2e",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/01-models/google/Vault-Gemma/.ipynb_checkpoints/Vault-Gemma-1B-checkpoint.ipynb b/01-models/google/Vault-Gemma/.ipynb_checkpoints/Vault-Gemma-1B-checkpoint.ipynb
new file mode 100644
index 0000000..cd7a1e9
--- /dev/null
+++ b/01-models/google/Vault-Gemma/.ipynb_checkpoints/Vault-Gemma-1B-checkpoint.ipynb
@@ -0,0 +1,321 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "94d68fb3-00f2-447c-b5e0-28fa18562392",
+   "metadata": {},
+   "source": [
+    "# How to deploy the VaultGemma 1B for inference using Amazon SageMakerAI\n",
+    "**Recommended kernel(s):** This notebook can be run with any Amazon SageMaker Studio kernel.\n",
+    "\n",
+    "In this notebook, you will learn how to deploy the Vault Gemma 1B model (HuggingFace model ID: [google/vaultgemma-1b](https://huggingface.co/google/vaultgemma-1b)) using Amazon SageMaker AI. \n",
+    "\n",
+    "VaultGemma is a variant of the Gemma family of lightweight, state-of-the-art open models from Google. It is pre-trained from the ground up using Differential Privacy (DP). This provides strong, mathematically-backed privacy guarantees for its training data, limiting the extent to which the model's outputs can reveal information about any single training example.\n",
+    "\n",
+    "VaultGemma uses a similar architecture as Gemma 2. VaultGemma is a pretrained model that can be instruction tuned for a variety of language understanding and generation tasks. Its relatively small size (< 1B parameters) makes it possible to deploy in environments with limited resources, democratizing access to state-of-the-art AI models that are built with privacy at their core.\n",
+    "\n",
+    "### License agreement\n",
+    "* This model is gated on HuggingFace, please refer to the original [model card](https://huggingface.co/google/vaultgemma-1b) for license.\n",
+    "* This notebook is a sample notebook and not intended for production use.\n",
+    "\n",
+    "### Execution environment setup\n",
+    "This notebook requires the following third-party Python dependencies:\n",
+    "* AWS [`sagemaker`](https://sagemaker.readthedocs.io/en/stable/index.html) with a version greater than or equal to 2.242.0\n",
+    "\n",
+    "Let's install or upgrade these dependencies using the following command:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "fed1c92f-9fbc-47c9-b940-7dd07f962ef8",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-09-15T19:04:31.124480Z",
+     "iopub.status.busy": "2025-09-15T19:04:31.124212Z",
+     "iopub.status.idle": "2025-09-15T19:04:37.189319Z",
+     "shell.execute_reply": "2025-09-15T19:04:37.188352Z",
+     "shell.execute_reply.started": "2025-09-15T19:04:31.124456Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
+      "autogluon-multimodal 1.4.0 requires nvidia-ml-py3<8.0,>=7.352.0, which is not installed.\n",
+      "aiobotocore 2.21.1 requires botocore<1.37.2,>=1.37.0, but you have botocore 1.40.30 which is incompatible.\n",
+      "autogluon-multimodal 1.4.0 requires transformers[sentencepiece]<4.50,>=4.38.0, but you have transformers 4.57.0.dev0 which is incompatible.\n",
+      "autogluon-timeseries 1.4.0 requires transformers[sentencepiece]<4.50,>=4.38.0, but you have transformers 4.57.0.dev0 which is incompatible.\n",
+      "sagemaker-studio-analytics-extension 0.2.0 requires sparkmagic==0.22.0, but you have sparkmagic 0.21.0 which is incompatible.\n",
+      "sparkmagic 0.21.0 requires pandas<2.0.0,>=0.17.1, but you have pandas 2.3.1 which is incompatible.\u001b[0m\u001b[31m\n",
+      "\u001b[0mNote: you may need to restart the kernel to use updated packages.\n"
+     ]
+    }
+   ],
+   "source": [
+    "%pip install -Uq sagemaker"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "fc56cad7-ea97-43c4-835d-a56729549023",
+   "metadata": {},
+   "source": [
+    "### Setup"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "c128cf97-3f1f-4176-87b4-1193cd9e9c0f",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-09-15T19:04:37.191175Z",
+     "iopub.status.busy": "2025-09-15T19:04:37.190795Z",
+     "iopub.status.idle": "2025-09-15T19:04:37.196080Z",
+     "shell.execute_reply": "2025-09-15T19:04:37.195223Z",
+     "shell.execute_reply.started": "2025-09-15T19:04:37.191132Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2.245.0\n"
+     ]
+    }
+   ],
+   "source": [
+    "import sagemaker\n",
+    "import boto3\n",
+    "import logging\n",
+    "import time\n",
+    "from sagemaker.session import Session\n",
+    "from sagemaker.s3 import S3Uploader\n",
+    "\n",
+    "print(sagemaker.__version__)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "bba6d4bf-5df9-4e3e-8a9e-a03897e0cb0f",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-09-15T19:04:37.197423Z",
+     "iopub.status.busy": "2025-09-15T19:04:37.197103Z",
+     "iopub.status.idle": "2025-09-15T19:04:38.010473Z",
+     "shell.execute_reply": "2025-09-15T19:04:38.009714Z",
+     "shell.execute_reply.started": "2025-09-15T19:04:37.197392Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "try:\n",
+    "    role = sagemaker.get_execution_role()\n",
+    "    sagemaker_session  = sagemaker.Session()\n",
+    "    \n",
+    "except ValueError:\n",
+    "    iam = boto3.client('iam')\n",
+    "    role = iam.get_role(RoleName='sagemaker_execution_role')['Role']['Arn']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "7e6b15cd-34e5-45f4-a68a-728aa9cc930d",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-09-15T19:09:45.382881Z",
+     "iopub.status.busy": "2025-09-15T19:09:45.382602Z",
+     "iopub.status.idle": "2025-09-15T19:09:45.388802Z",
+     "shell.execute_reply": "2025-09-15T19:09:45.387863Z",
+     "shell.execute_reply.started": "2025-09-15T19:09:45.382860Z"
+    }
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'vaultgemma-1b'"
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "HF_MODEL_ID = \"google/vaultgemma-1b\"\n",
+    "HUGGING_FACE_HUB_TOKEN = \"<REPLACE WITH TOKEN>\"\n",
+    "\n",
+    "base_name = HF_MODEL_ID.split('/')[-1].replace('.', '-').lower()\n",
+    "model_lineage = HF_MODEL_ID.split('/')[0]\n",
+    "base_name"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "af864803-2921-4dda-95d7-6177b7d720ec",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-09-15T19:07:30.037543Z",
+     "iopub.status.busy": "2025-09-15T19:07:30.037241Z",
+     "iopub.status.idle": "2025-09-15T19:07:30.041271Z",
+     "shell.execute_reply": "2025-09-15T19:07:30.040218Z",
+     "shell.execute_reply.started": "2025-09-15T19:07:30.037519Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "instance_type = \"ml.m5.xlarge\"\n",
+    "instance_count = 1"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "7245e684-aa6e-44ca-9451-10e0bee4b96e",
+   "metadata": {},
+   "source": [
+    "## Create SageMaker Model\n",
+    "\n",
+    "#### HUGGING_FACE_HUB_TOKEN \n",
+    "VaultGemma-1B is a gated model. Therefore, if you deploy model files hosted on the Hub, you need to provide your HuggingFace token as environment variable. This enables SageMaker AI to download the files at runtime."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "id": "2e196ece-bada-486a-8f20-b78a381de41b",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-09-15T19:13:36.536292Z",
+     "iopub.status.busy": "2025-09-15T19:13:36.535990Z",
+     "iopub.status.idle": "2025-09-15T19:13:36.714432Z",
+     "shell.execute_reply": "2025-09-15T19:13:36.713736Z",
+     "shell.execute_reply.started": "2025-09-15T19:13:36.536270Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "from sagemaker.huggingface import HuggingFaceModel\n",
+    "\n",
+    "# Hub Model configuration. https://huggingface.co/models\n",
+    "hub = {\n",
+    "\t'HF_MODEL_ID':'google/vaultgemma-1b',\n",
+    "\t'HF_TASK':'text-generation',\n",
+    "    'HF_TOKEN':HUGGING_FACE_HUB_TOKEN\n",
+    "}\n",
+    "\n",
+    "# create Hugging Face Model Class\n",
+    "huggingface_model = HuggingFaceModel(\n",
+    "\ttransformers_version='4.49.0',\n",
+    "\tpytorch_version='2.6.0',\n",
+    "\tpy_version='py312',\n",
+    "\tenv=hub,\n",
+    "\trole=role, \n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "40af4b17-30e9-4138-93f3-0589e4770815",
+   "metadata": {},
+   "source": [
+    "## Deploy Model to SageMaker Endpoint\n",
+    "\n",
+    "Now we'll deploy our model to a SageMaker endpoint for real-time inference. This is a significant step that:\n",
+    "1. Provisions the specified compute resources (M5 instance)\n",
+    "2. Deploys the model container\n",
+    "3. Sets up the endpoint for API access\n",
+    "\n",
+    "### Deployment Configuration\n",
+    "- **Instance Count**: 1 instance for single-node deployment\n",
+    "- **Instance Type**: `ml.m5.xlarge` for high-performance inference\n",
+    "\n",
+    "> ⚠️ **Important**: \n",
+    "> - Deployment can take up to 15 minutes\n",
+    "> - Monitor the CloudWatch logs for progress"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "786fa8a9-d635-412f-adc5-4941a80b2b72",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%time\n",
+    "\n",
+    "# deploy model to SageMaker Inference\n",
+    "predictor = huggingface_model.deploy(\n",
+    "\tinitial_instance_count=instance_count, # number of instances\n",
+    "\tinstance_type=instance_type # ec2 instance type\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "dc38898a-290b-482b-968c-f04b8674300c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "predictor.predict({\n",
+    "\t\"inputs\": \"Can you please let us know more details about your training using differential privacy?\",\n",
+    "})"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f7b85c32-d4df-45f1-84f4-86a296ad1c0d",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-09-15T19:14:57.135928Z",
+     "iopub.status.busy": "2025-09-15T19:14:57.135661Z",
+     "iopub.status.idle": "2025-09-15T19:14:57.139468Z",
+     "shell.execute_reply": "2025-09-15T19:14:57.138566Z",
+     "shell.execute_reply.started": "2025-09-15T19:14:57.135907Z"
+    }
+   },
+   "source": [
+    "# Clean up"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bcef37c7-6b47-4c3b-b6b7-b266245b492e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "huggingface_model.delete_model()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/01-models/google/Vault-Gemma/Vault-Gemma-1B.ipynb b/01-models/google/Vault-Gemma/Vault-Gemma-1B.ipynb
new file mode 100644
index 0000000..2b01394
--- /dev/null
+++ b/01-models/google/Vault-Gemma/Vault-Gemma-1B.ipynb
@@ -0,0 +1,533 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "94d68fb3-00f2-447c-b5e0-28fa18562392",
+   "metadata": {},
+   "source": [
+    "# How to deploy the VaultGemma 1B for inference using Amazon SageMakerAI\n",
+    "**Recommended kernel(s):** This notebook can be run with any Amazon SageMaker Studio kernel.\n",
+    "\n",
+    "In this notebook, you will learn how to deploy the Vault Gemma 1B model (HuggingFace model ID: [google/vaultgemma-1b](https://huggingface.co/google/vaultgemma-1b)) using Amazon SageMaker AI. \n",
+    "\n",
+    "VaultGemma is a variant of the Gemma family of lightweight, state-of-the-art open models from Google. It is pre-trained from the ground up using Differential Privacy (DP). This provides strong, mathematically-backed privacy guarantees for its training data, limiting the extent to which the model's outputs can reveal information about any single training example.\n",
+    "\n",
+    "VaultGemma uses a similar architecture as Gemma 2. VaultGemma is a pretrained model that can be instruction tuned for a variety of language understanding and generation tasks. Its relatively small size (< 1B parameters) makes it possible to deploy in environments with limited resources, democratizing access to state-of-the-art AI models that are built with privacy at their core.\n",
+    "\n",
+    "### License agreement\n",
+    "* This model is gated on HuggingFace, please refer to the original [model card](https://huggingface.co/google/vaultgemma-1b) for license.\n",
+    "* This notebook is a sample notebook and not intended for production use.\n",
+    "\n",
+    "### Execution environment setup\n",
+    "This notebook requires the following third-party Python dependencies:\n",
+    "* AWS [`sagemaker`](https://sagemaker.readthedocs.io/en/stable/index.html) with a version greater than or equal to 2.242.0\n",
+    "\n",
+    "Let's install or upgrade these dependencies using the following command:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fed1c92f-9fbc-47c9-b940-7dd07f962ef8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%pip install -Uq huggingface==4.49 sagemaker transformers==4.57.0"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "fc56cad7-ea97-43c4-835d-a56729549023",
+   "metadata": {},
+   "source": [
+    "### Setup"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "c128cf97-3f1f-4176-87b4-1193cd9e9c0f",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-10-28T20:33:42.848333Z",
+     "iopub.status.busy": "2025-10-28T20:33:42.848186Z",
+     "iopub.status.idle": "2025-10-28T20:33:42.851101Z",
+     "shell.execute_reply": "2025-10-28T20:33:42.850698Z",
+     "shell.execute_reply.started": "2025-10-28T20:33:42.848317Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2.253.1\n"
+     ]
+    }
+   ],
+   "source": [
+    "import os\n",
+    "import datetime\n",
+    "import sagemaker\n",
+    "import boto3\n",
+    "import logging\n",
+    "import json\n",
+    "import time\n",
+    "import shutil\n",
+    "import tarfile\n",
+    "\n",
+    "import sagemaker\n",
+    "from sagemaker.huggingface import HuggingFaceModel\n",
+    "from sagemaker.session import Session\n",
+    "from sagemaker.s3 import S3Uploader\n",
+    "\n",
+    "from huggingface_hub import snapshot_download\n",
+    "\n",
+    "print(sagemaker.__version__)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "bba6d4bf-5df9-4e3e-8a9e-a03897e0cb0f",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-10-28T20:38:20.172482Z",
+     "iopub.status.busy": "2025-10-28T20:38:20.172255Z",
+     "iopub.status.idle": "2025-10-28T20:38:20.624643Z",
+     "shell.execute_reply": "2025-10-28T20:38:20.624085Z",
+     "shell.execute_reply.started": "2025-10-28T20:38:20.172467Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "google-vaultgemma-1b-endpoint-1761683900-420591\n",
+      "Saving model artifacts to sagemaker-us-east-1-329542461890/models/google_vaultgemma-1b\n"
+     ]
+    }
+   ],
+   "source": [
+    "session = sagemaker.Session()\n",
+    "role = sagemaker.get_execution_role()\n",
+    "\n",
+    "instance_type = \"ml.m5.xlarge\"\n",
+    "instance_count = 1\n",
+    "\n",
+    "HUGGING_FACE_HUB_TOKEN = \"<REPLACE_ME>\"\n",
+    "model_id = \"google/vaultgemma-1b\"\n",
+    "model_id_filesafe = model_id.replace(\"/\", \"_\").replace(\".\", \"_\")\n",
+    "endpoint_name = f\"{model_id_filesafe.replace(\"_\", \"-\")}-endpoint-{str(datetime.datetime.now().timestamp()).replace(\".\", \"-\")}\"\n",
+    "print(endpoint_name)\n",
+    "\n",
+    "base_name = model_id.split('/')[-1].replace('.', '-').lower()\n",
+    "model_lineage = model_id.split('/')[0]\n",
+    "base_name\n",
+    "\n",
+    "bucket_name = session.default_bucket()\n",
+    "default_prefix = session.default_bucket_prefix or f\"models/{model_id_filesafe}\"\n",
+    "print(f\"Saving model artifacts to {bucket_name}/{default_prefix}\")\n",
+    "\n",
+    "os.makedirs(\"code\", exist_ok=True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "51c1ee59-9644-42d3-8f3f-45dd7e8c124d",
+   "metadata": {},
+   "source": [
+    "### Local Model Test"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "d8b33210-6e04-4885-ad35-513f257124d9",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-10-28T20:42:00.175305Z",
+     "iopub.status.busy": "2025-10-28T20:42:00.175098Z",
+     "iopub.status.idle": "2025-10-28T20:42:00.193771Z",
+     "shell.execute_reply": "2025-10-28T20:42:00.193345Z",
+     "shell.execute_reply.started": "2025-10-28T20:42:00.175290Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "from huggingface_hub import login\n",
+    "login(HUGGING_FACE_HUB_TOKEN)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "7f67cb1f-77b4-4348-884e-cccf5a4f845a",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-10-28T20:42:04.762066Z",
+     "iopub.status.busy": "2025-10-28T20:42:04.761680Z",
+     "iopub.status.idle": "2025-10-28T20:42:17.726388Z",
+     "shell.execute_reply": "2025-10-28T20:42:17.725770Z",
+     "shell.execute_reply.started": "2025-10-28T20:42:04.762034Z"
+    }
+   },
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "40cb769b1a894fc580d2b283d11ed516",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "config.json:   0%|          | 0.00/1.37k [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2025-10-28 20:42:07.834777: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n",
+      "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
+      "E0000 00:00:1761684127.844092   65037 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n",
+      "E0000 00:00:1761684127.847260   65037 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n",
+      "2025-10-28 20:42:07.857283: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
+      "To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "e2af8604482e40b8bb00f613106f3df0",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "model.safetensors:   0%|          | 0.00/2.08G [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "cd6d67318675442491d3cf9f4a19f642",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "generation_config.json:   0%|          | 0.00/132 [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Explain in simple terms how differential privacy works.\n",
+      "\n",
+      "Explain in simple terms how differential privacy works.\n",
+      "\n",
+      "The following is a list of some of the functions of the nervous system:\n",
+      "\n",
+      "$\\begin{array}{lll} \\text { (a) } & \\text { muscle } & \\text { (b) } & \\text { nerve } \\\\ \\text { (c) } & \\text { gland } & \\text { (d) } & \\text { muscle } \\\\ \\text { (e) } & \\text { gland } & \\text { (f) } & \\text { gland } \\\\ \\text { (g) } & \\text { gland } & \\text { (h) } & \\text { gland } \\\\ \\text { (\n"
+     ]
+    }
+   ],
+   "source": [
+    "from transformers import AutoModelForCausalLM, AutoTokenizer\n",
+    "\n",
+    "tokenizer = AutoTokenizer.from_pretrained(\"google/vaultgemma-1b\", trust_remote_code=True, token=HUGGING_FACE_HUB_TOKEN)\n",
+    "model = AutoModelForCausalLM.from_pretrained(\"google/vaultgemma-1b\", device_map=\"auto\", dtype=\"auto\")\n",
+    "\n",
+    "prompt_text = \"Explain in simple terms how differential privacy works.\"\n",
+    "input_ids = tokenizer(prompt_text, return_tensors=\"pt\").to(model.device)\n",
+    "\n",
+    "generated_outputs = model.generate(\n",
+    "    **input_ids,\n",
+    "    max_new_tokens=150,\n",
+    "    do_sample=True,\n",
+    "    top_p=0.9,\n",
+    "    temperature=0.8\n",
+    ")\n",
+    "\n",
+    "response = tokenizer.decode(generated_outputs[0], skip_special_tokens=True)\n",
+    "print(response)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "7245e684-aa6e-44ca-9451-10e0bee4b96e",
+   "metadata": {},
+   "source": [
+    "## Create SageMaker Model\n",
+    "\n",
+    "#### HUGGING_FACE_HUB_TOKEN \n",
+    "VaultGemma-1B is a gated model. Therefore, if you deploy model files hosted on the Hub, you need to provide your HuggingFace token as environment variable. This enables SageMaker AI to download the files at runtime."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "98e70481-6eff-45d2-90a3-58d6736b67fe",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "env = {\n",
+    "    'HF_MODEL_ID': model_id,\n",
+    "    'HF_TOKEN': HUGGING_FACE_HUB_TOKEN,\n",
+    "    'HF_TASK':'image-text-to-text',\n",
+    "    'SM_NUM_GPUS': json.dumps(1),\n",
+    "    'OPTION_TRUST_REMOTE_CODE': 'true',\n",
+    "    'OPTION_MODEL_LOADING_TIMEOUT': '3600',\n",
+    "    \"OPTION_ROLLING_BATCH\": \"vllm\",\n",
+    "    \"OPTION_TENSOR_PARALLEL_DEGREE\": \"1\",\n",
+    "    \"OPTION_MAX_MODEL_LEN\": \"5000\",\n",
+    "    \"OPTION_ASYNC_MODE\": \"true\",\n",
+    "    \"OPTION_TRUST_REMOTE_CODE\": \"true\",\n",
+    "    \"SERVING_FAIL_FAST\": \"true\",\n",
+    "}\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "954728b5-14c7-4ce0-9d3a-0ec1eab4414f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%writefile code/requirements.txt\n",
+    "transformers==4.57.0\n",
+    "huggingface==4.49"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "df67b145-1667-416e-b435-3a25c6384574",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%writefile code/inference.py\n",
+    "from transformers import AutoModelForCausalLM, AutoTokenizer\n",
+    "\n",
+    "logger = logging.getLogger()\n",
+    "logger.setLevel(logging.INFO)\n",
+    "\n",
+    "def model_fn(model_dir):\n",
+    "\n",
+    "    tokenizer = AutoTokenizer.from_pretrained(model_dir)\n",
+    "    model = AutoModelForCausalLM.from_pretrained(model_dir, device_map=\"auto\", dtype=\"auto\")\n",
+    "\n",
+    "\n",
+    "    return {\"tokenizer\": tokenizer, \"model\": model}\n",
+    "\n",
+    "\n",
+    "def predict_fn(data, model_obj):\n",
+    "    tokenizer = model_obj[\"tokenizer\"]\n",
+    "    model = model_obj[\"model\"]\n",
+    "    \n",
+    "    prompt_text = \"Explain in simple terms how differential privacy works.\"\n",
+    "    input_ids = tokenizer(prompt_text, return_tensors=\"pt\").to(model.device)\n",
+    "    \n",
+    "    generated_outputs = model.generate(\n",
+    "        **input_ids,\n",
+    "        max_new_tokens=150,\n",
+    "        do_sample=True,\n",
+    "        top_p=0.9,\n",
+    "        temperature=0.8\n",
+    "    )\n",
+    "    \n",
+    "    response = tokenizer.decode(generated_outputs[0], skip_special_tokens=True)\n",
+    "    return response"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "14da1022-ddfd-4e67-b51c-5db9eadf255e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def filter_function(tarinfo):\n",
+    "    \"\"\"Filter function to exclude .cache files and directories\"\"\"\n",
+    "    if '.cache' in tarinfo.name or '.gitattributes' in tarinfo.name:\n",
+    "        return None\n",
+    "    return tarinfo"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "301cd83e-da6f-4091-9f54-4b20cc982d4e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "s3_client = boto3.client('s3')\n",
+    "key = f\"{default_prefix}/model.tar.gz\"\n",
+    "force_rebuild_tarball = True\n",
+    "\n",
+    "if force_rebuild_tarball or not s3_client.head_object(Bucket=bucket_name, Key=key):\n",
+    "    try:\n",
+    "        model_path = snapshot_download(repo_id=model_id, local_dir=\"./model\")\n",
+    "        print(f\"Successfully downloaded to {model_path}\")\n",
+    "    except Exception as e:\n",
+    "        print(f\"Failed to download after retries: {str(e)}\")\n",
+    "    \n",
+    "    print(\"Building gzipped tarball...\")\n",
+    "    with tarfile.open(\"./model.tar.gz\", \"w:gz\") as tar:\n",
+    "        tar.add(model_path, arcname=\".\", filter=filter_function)\n",
+    "        tar.add(\"./code\", filter=filter_function)\n",
+    "    print(\"Successfully tarred the ball.\")\n",
+    "    \n",
+    "    print(f\"Uploading tarball to {bucket_name}/{default_prefix}...\")\n",
+    "    s3_client.upload_file(\"./model.tar.gz\", bucket_name, f\"{default_prefix}/model.tar.gz\")\n",
+    "    # shutil.rmtree(\"./model\")\n",
+    "    # os.remove(\"./model.tar.gz\")\n",
+    "    print(\"Successfully uploaded, working directory cleaned\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "40af4b17-30e9-4138-93f3-0589e4770815",
+   "metadata": {},
+   "source": [
+    "## Deploy Model to SageMaker Endpoint\n",
+    "\n",
+    "Now we'll deploy our model to a SageMaker endpoint for real-time inference. This is a significant step that:\n",
+    "1. Provisions the specified compute resources (M5 instance)\n",
+    "2. Deploys the model container\n",
+    "3. Sets up the endpoint for API access\n",
+    "\n",
+    "### Deployment Configuration\n",
+    "- **Instance Count**: 1 instance for single-node deployment\n",
+    "- **Instance Type**: `ml.m5.xlarge` for high-performance inference\n",
+    "\n",
+    "> ⚠️ **Important**: \n",
+    "> - Deployment can take up to 15 minutes\n",
+    "> - Monitor the CloudWatch logs for progress"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "794b5a0a-bfb4-4659-962b-9523ce38de1b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Hub Model configuration. https://huggingface.co/models\n",
+    "hub = {\n",
+    "\t'HF_MODEL_ID':'google/vaultgemma-1b',\n",
+    "\t'HF_TASK':'image-text-to-text',\n",
+    "    'HF_TOKEN': HUGGING_FACE_HUB_TOKEN,\n",
+    "}\n",
+    "\n",
+    "# create Hugging Face Model Class\n",
+    "huggingface_model = HuggingFaceModel(\n",
+    "    model_data=f\"s3://{bucket_name}/{default_prefix}/model.tar.gz\",\n",
+    "\ttransformers_version='4.49.0',\n",
+    "\tpytorch_version='2.6.0',\n",
+    "\tpy_version='py312',\n",
+    "\tenv=env,\n",
+    "\trole=role, \n",
+    "    entry_point=\"inference.py\",\n",
+    "    enable_network_isolation=False\n",
+    ")\n",
+    "\n",
+    "# deploy model to SageMaker Inference\n",
+    "predictor = huggingface_model.deploy(\n",
+    "\tinitial_instance_count=1, # number of instances\n",
+    "\tinstance_type='ml.m5.xlarge' # ec2 instance type\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2e196ece-bada-486a-8f20-b78a381de41b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Using DJL Serving\n",
+    "# UNDER CONSTRUCTION\n",
+    "# %%time\n",
+    "\n",
+    "# image_uri = \"763104351884.dkr.ecr.us-east-1.amazonaws.com/djl-inference:0.34.0-lmi16.0.0-cu128-v1.2\"\n",
+    "# model = HuggingFaceModel(\n",
+    "#     model_data=f\"s3://{bucket_name}/{default_prefix}/model.tar.gz\",\n",
+    "#     image_uri=image_uri,\n",
+    "#     env=env,\n",
+    "#     role=role,\n",
+    "#     entry_point=\"inference.py\",\n",
+    "#     enable_network_isolation=False\n",
+    "# )\n",
+    "\n",
+    "# predictor = model.deploy(\n",
+    "#     initial_instance_count=instance_count,\n",
+    "#     instance_type=instance_type,\n",
+    "#     endpoint_name=endpoint_name\n",
+    "# )\n",
+    "\n",
+    "# predictor.predict({\n",
+    "# \t\"inputs\": \"Can you please let us know more details about your training using differential privacy?\",\n",
+    "# })"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f7b85c32-d4df-45f1-84f4-86a296ad1c0d",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-09-15T19:14:57.135928Z",
+     "iopub.status.busy": "2025-09-15T19:14:57.135661Z",
+     "iopub.status.idle": "2025-09-15T19:14:57.139468Z",
+     "shell.execute_reply": "2025-09-15T19:14:57.138566Z",
+     "shell.execute_reply.started": "2025-09-15T19:14:57.135907Z"
+    }
+   },
+   "source": [
+    "# Clean up"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bcef37c7-6b47-4c3b-b6b7-b266245b492e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "predictor.delete_endpoint(True)\n",
+    "huggingface_model.delete_model()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/Vault-Gemma/.ipynb_checkpoints/Vault-Gemma-1B-checkpoint.ipynb b/Vault-Gemma/.ipynb_checkpoints/Vault-Gemma-1B-checkpoint.ipynb
new file mode 100644
index 0000000..cd7a1e9
--- /dev/null
+++ b/Vault-Gemma/.ipynb_checkpoints/Vault-Gemma-1B-checkpoint.ipynb
@@ -0,0 +1,321 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "94d68fb3-00f2-447c-b5e0-28fa18562392",
+   "metadata": {},
+   "source": [
+    "# How to deploy the VaultGemma 1B for inference using Amazon SageMakerAI\n",
+    "**Recommended kernel(s):** This notebook can be run with any Amazon SageMaker Studio kernel.\n",
+    "\n",
+    "In this notebook, you will learn how to deploy the Vault Gemma 1B model (HuggingFace model ID: [google/vaultgemma-1b](https://huggingface.co/google/vaultgemma-1b)) using Amazon SageMaker AI. \n",
+    "\n",
+    "VaultGemma is a variant of the Gemma family of lightweight, state-of-the-art open models from Google. It is pre-trained from the ground up using Differential Privacy (DP). This provides strong, mathematically-backed privacy guarantees for its training data, limiting the extent to which the model's outputs can reveal information about any single training example.\n",
+    "\n",
+    "VaultGemma uses a similar architecture as Gemma 2. VaultGemma is a pretrained model that can be instruction tuned for a variety of language understanding and generation tasks. Its relatively small size (< 1B parameters) makes it possible to deploy in environments with limited resources, democratizing access to state-of-the-art AI models that are built with privacy at their core.\n",
+    "\n",
+    "### License agreement\n",
+    "* This model is gated on HuggingFace, please refer to the original [model card](https://huggingface.co/google/vaultgemma-1b) for license.\n",
+    "* This notebook is a sample notebook and not intended for production use.\n",
+    "\n",
+    "### Execution environment setup\n",
+    "This notebook requires the following third-party Python dependencies:\n",
+    "* AWS [`sagemaker`](https://sagemaker.readthedocs.io/en/stable/index.html) with a version greater than or equal to 2.242.0\n",
+    "\n",
+    "Let's install or upgrade these dependencies using the following command:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "fed1c92f-9fbc-47c9-b940-7dd07f962ef8",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-09-15T19:04:31.124480Z",
+     "iopub.status.busy": "2025-09-15T19:04:31.124212Z",
+     "iopub.status.idle": "2025-09-15T19:04:37.189319Z",
+     "shell.execute_reply": "2025-09-15T19:04:37.188352Z",
+     "shell.execute_reply.started": "2025-09-15T19:04:31.124456Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
+      "autogluon-multimodal 1.4.0 requires nvidia-ml-py3<8.0,>=7.352.0, which is not installed.\n",
+      "aiobotocore 2.21.1 requires botocore<1.37.2,>=1.37.0, but you have botocore 1.40.30 which is incompatible.\n",
+      "autogluon-multimodal 1.4.0 requires transformers[sentencepiece]<4.50,>=4.38.0, but you have transformers 4.57.0.dev0 which is incompatible.\n",
+      "autogluon-timeseries 1.4.0 requires transformers[sentencepiece]<4.50,>=4.38.0, but you have transformers 4.57.0.dev0 which is incompatible.\n",
+      "sagemaker-studio-analytics-extension 0.2.0 requires sparkmagic==0.22.0, but you have sparkmagic 0.21.0 which is incompatible.\n",
+      "sparkmagic 0.21.0 requires pandas<2.0.0,>=0.17.1, but you have pandas 2.3.1 which is incompatible.\u001b[0m\u001b[31m\n",
+      "\u001b[0mNote: you may need to restart the kernel to use updated packages.\n"
+     ]
+    }
+   ],
+   "source": [
+    "%pip install -Uq sagemaker"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "fc56cad7-ea97-43c4-835d-a56729549023",
+   "metadata": {},
+   "source": [
+    "### Setup"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "c128cf97-3f1f-4176-87b4-1193cd9e9c0f",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-09-15T19:04:37.191175Z",
+     "iopub.status.busy": "2025-09-15T19:04:37.190795Z",
+     "iopub.status.idle": "2025-09-15T19:04:37.196080Z",
+     "shell.execute_reply": "2025-09-15T19:04:37.195223Z",
+     "shell.execute_reply.started": "2025-09-15T19:04:37.191132Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2.245.0\n"
+     ]
+    }
+   ],
+   "source": [
+    "import sagemaker\n",
+    "import boto3\n",
+    "import logging\n",
+    "import time\n",
+    "from sagemaker.session import Session\n",
+    "from sagemaker.s3 import S3Uploader\n",
+    "\n",
+    "print(sagemaker.__version__)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "bba6d4bf-5df9-4e3e-8a9e-a03897e0cb0f",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-09-15T19:04:37.197423Z",
+     "iopub.status.busy": "2025-09-15T19:04:37.197103Z",
+     "iopub.status.idle": "2025-09-15T19:04:38.010473Z",
+     "shell.execute_reply": "2025-09-15T19:04:38.009714Z",
+     "shell.execute_reply.started": "2025-09-15T19:04:37.197392Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "try:\n",
+    "    role = sagemaker.get_execution_role()\n",
+    "    sagemaker_session  = sagemaker.Session()\n",
+    "    \n",
+    "except ValueError:\n",
+    "    iam = boto3.client('iam')\n",
+    "    role = iam.get_role(RoleName='sagemaker_execution_role')['Role']['Arn']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "7e6b15cd-34e5-45f4-a68a-728aa9cc930d",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-09-15T19:09:45.382881Z",
+     "iopub.status.busy": "2025-09-15T19:09:45.382602Z",
+     "iopub.status.idle": "2025-09-15T19:09:45.388802Z",
+     "shell.execute_reply": "2025-09-15T19:09:45.387863Z",
+     "shell.execute_reply.started": "2025-09-15T19:09:45.382860Z"
+    }
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'vaultgemma-1b'"
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "HF_MODEL_ID = \"google/vaultgemma-1b\"\n",
+    "HUGGING_FACE_HUB_TOKEN = \"<REPLACE WITH TOKEN>\"\n",
+    "\n",
+    "base_name = HF_MODEL_ID.split('/')[-1].replace('.', '-').lower()\n",
+    "model_lineage = HF_MODEL_ID.split('/')[0]\n",
+    "base_name"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "af864803-2921-4dda-95d7-6177b7d720ec",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-09-15T19:07:30.037543Z",
+     "iopub.status.busy": "2025-09-15T19:07:30.037241Z",
+     "iopub.status.idle": "2025-09-15T19:07:30.041271Z",
+     "shell.execute_reply": "2025-09-15T19:07:30.040218Z",
+     "shell.execute_reply.started": "2025-09-15T19:07:30.037519Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "instance_type = \"ml.m5.xlarge\"\n",
+    "instance_count = 1"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "7245e684-aa6e-44ca-9451-10e0bee4b96e",
+   "metadata": {},
+   "source": [
+    "## Create SageMaker Model\n",
+    "\n",
+    "#### HUGGING_FACE_HUB_TOKEN \n",
+    "VaultGemma-1B is a gated model. Therefore, if you deploy model files hosted on the Hub, you need to provide your HuggingFace token as environment variable. This enables SageMaker AI to download the files at runtime."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "id": "2e196ece-bada-486a-8f20-b78a381de41b",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-09-15T19:13:36.536292Z",
+     "iopub.status.busy": "2025-09-15T19:13:36.535990Z",
+     "iopub.status.idle": "2025-09-15T19:13:36.714432Z",
+     "shell.execute_reply": "2025-09-15T19:13:36.713736Z",
+     "shell.execute_reply.started": "2025-09-15T19:13:36.536270Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "from sagemaker.huggingface import HuggingFaceModel\n",
+    "\n",
+    "# Hub Model configuration. https://huggingface.co/models\n",
+    "hub = {\n",
+    "\t'HF_MODEL_ID':'google/vaultgemma-1b',\n",
+    "\t'HF_TASK':'text-generation',\n",
+    "    'HF_TOKEN':HUGGING_FACE_HUB_TOKEN\n",
+    "}\n",
+    "\n",
+    "# create Hugging Face Model Class\n",
+    "huggingface_model = HuggingFaceModel(\n",
+    "\ttransformers_version='4.49.0',\n",
+    "\tpytorch_version='2.6.0',\n",
+    "\tpy_version='py312',\n",
+    "\tenv=hub,\n",
+    "\trole=role, \n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "40af4b17-30e9-4138-93f3-0589e4770815",
+   "metadata": {},
+   "source": [
+    "## Deploy Model to SageMaker Endpoint\n",
+    "\n",
+    "Now we'll deploy our model to a SageMaker endpoint for real-time inference. This is a significant step that:\n",
+    "1. Provisions the specified compute resources (M5 instance)\n",
+    "2. Deploys the model container\n",
+    "3. Sets up the endpoint for API access\n",
+    "\n",
+    "### Deployment Configuration\n",
+    "- **Instance Count**: 1 instance for single-node deployment\n",
+    "- **Instance Type**: `ml.m5.xlarge` for high-performance inference\n",
+    "\n",
+    "> ⚠️ **Important**: \n",
+    "> - Deployment can take up to 15 minutes\n",
+    "> - Monitor the CloudWatch logs for progress"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "786fa8a9-d635-412f-adc5-4941a80b2b72",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%time\n",
+    "\n",
+    "# deploy model to SageMaker Inference\n",
+    "predictor = huggingface_model.deploy(\n",
+    "\tinitial_instance_count=instance_count, # number of instances\n",
+    "\tinstance_type=instance_type # ec2 instance type\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "dc38898a-290b-482b-968c-f04b8674300c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "predictor.predict({\n",
+    "\t\"inputs\": \"Can you please let us know more details about your training using differential privacy?\",\n",
+    "})"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f7b85c32-d4df-45f1-84f4-86a296ad1c0d",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-09-15T19:14:57.135928Z",
+     "iopub.status.busy": "2025-09-15T19:14:57.135661Z",
+     "iopub.status.idle": "2025-09-15T19:14:57.139468Z",
+     "shell.execute_reply": "2025-09-15T19:14:57.138566Z",
+     "shell.execute_reply.started": "2025-09-15T19:14:57.135907Z"
+    }
+   },
+   "source": [
+    "# Clean up"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bcef37c7-6b47-4c3b-b6b7-b266245b492e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "huggingface_model.delete_model()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/Vault-Gemma/Vault-Gemma-1B.ipynb b/Vault-Gemma/Vault-Gemma-1B.ipynb
new file mode 100644
index 0000000..cd7a1e9
--- /dev/null
+++ b/Vault-Gemma/Vault-Gemma-1B.ipynb
@@ -0,0 +1,321 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "94d68fb3-00f2-447c-b5e0-28fa18562392",
+   "metadata": {},
+   "source": [
+    "# How to deploy the VaultGemma 1B for inference using Amazon SageMakerAI\n",
+    "**Recommended kernel(s):** This notebook can be run with any Amazon SageMaker Studio kernel.\n",
+    "\n",
+    "In this notebook, you will learn how to deploy the Vault Gemma 1B model (HuggingFace model ID: [google/vaultgemma-1b](https://huggingface.co/google/vaultgemma-1b)) using Amazon SageMaker AI. \n",
+    "\n",
+    "VaultGemma is a variant of the Gemma family of lightweight, state-of-the-art open models from Google. It is pre-trained from the ground up using Differential Privacy (DP). This provides strong, mathematically-backed privacy guarantees for its training data, limiting the extent to which the model's outputs can reveal information about any single training example.\n",
+    "\n",
+    "VaultGemma uses a similar architecture as Gemma 2. VaultGemma is a pretrained model that can be instruction tuned for a variety of language understanding and generation tasks. Its relatively small size (< 1B parameters) makes it possible to deploy in environments with limited resources, democratizing access to state-of-the-art AI models that are built with privacy at their core.\n",
+    "\n",
+    "### License agreement\n",
+    "* This model is gated on HuggingFace, please refer to the original [model card](https://huggingface.co/google/vaultgemma-1b) for license.\n",
+    "* This notebook is a sample notebook and not intended for production use.\n",
+    "\n",
+    "### Execution environment setup\n",
+    "This notebook requires the following third-party Python dependencies:\n",
+    "* AWS [`sagemaker`](https://sagemaker.readthedocs.io/en/stable/index.html) with a version greater than or equal to 2.242.0\n",
+    "\n",
+    "Let's install or upgrade these dependencies using the following command:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "fed1c92f-9fbc-47c9-b940-7dd07f962ef8",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-09-15T19:04:31.124480Z",
+     "iopub.status.busy": "2025-09-15T19:04:31.124212Z",
+     "iopub.status.idle": "2025-09-15T19:04:37.189319Z",
+     "shell.execute_reply": "2025-09-15T19:04:37.188352Z",
+     "shell.execute_reply.started": "2025-09-15T19:04:31.124456Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
+      "autogluon-multimodal 1.4.0 requires nvidia-ml-py3<8.0,>=7.352.0, which is not installed.\n",
+      "aiobotocore 2.21.1 requires botocore<1.37.2,>=1.37.0, but you have botocore 1.40.30 which is incompatible.\n",
+      "autogluon-multimodal 1.4.0 requires transformers[sentencepiece]<4.50,>=4.38.0, but you have transformers 4.57.0.dev0 which is incompatible.\n",
+      "autogluon-timeseries 1.4.0 requires transformers[sentencepiece]<4.50,>=4.38.0, but you have transformers 4.57.0.dev0 which is incompatible.\n",
+      "sagemaker-studio-analytics-extension 0.2.0 requires sparkmagic==0.22.0, but you have sparkmagic 0.21.0 which is incompatible.\n",
+      "sparkmagic 0.21.0 requires pandas<2.0.0,>=0.17.1, but you have pandas 2.3.1 which is incompatible.\u001b[0m\u001b[31m\n",
+      "\u001b[0mNote: you may need to restart the kernel to use updated packages.\n"
+     ]
+    }
+   ],
+   "source": [
+    "%pip install -Uq sagemaker"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "fc56cad7-ea97-43c4-835d-a56729549023",
+   "metadata": {},
+   "source": [
+    "### Setup"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "c128cf97-3f1f-4176-87b4-1193cd9e9c0f",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-09-15T19:04:37.191175Z",
+     "iopub.status.busy": "2025-09-15T19:04:37.190795Z",
+     "iopub.status.idle": "2025-09-15T19:04:37.196080Z",
+     "shell.execute_reply": "2025-09-15T19:04:37.195223Z",
+     "shell.execute_reply.started": "2025-09-15T19:04:37.191132Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2.245.0\n"
+     ]
+    }
+   ],
+   "source": [
+    "import sagemaker\n",
+    "import boto3\n",
+    "import logging\n",
+    "import time\n",
+    "from sagemaker.session import Session\n",
+    "from sagemaker.s3 import S3Uploader\n",
+    "\n",
+    "print(sagemaker.__version__)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "bba6d4bf-5df9-4e3e-8a9e-a03897e0cb0f",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-09-15T19:04:37.197423Z",
+     "iopub.status.busy": "2025-09-15T19:04:37.197103Z",
+     "iopub.status.idle": "2025-09-15T19:04:38.010473Z",
+     "shell.execute_reply": "2025-09-15T19:04:38.009714Z",
+     "shell.execute_reply.started": "2025-09-15T19:04:37.197392Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "try:\n",
+    "    role = sagemaker.get_execution_role()\n",
+    "    sagemaker_session  = sagemaker.Session()\n",
+    "    \n",
+    "except ValueError:\n",
+    "    iam = boto3.client('iam')\n",
+    "    role = iam.get_role(RoleName='sagemaker_execution_role')['Role']['Arn']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "7e6b15cd-34e5-45f4-a68a-728aa9cc930d",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-09-15T19:09:45.382881Z",
+     "iopub.status.busy": "2025-09-15T19:09:45.382602Z",
+     "iopub.status.idle": "2025-09-15T19:09:45.388802Z",
+     "shell.execute_reply": "2025-09-15T19:09:45.387863Z",
+     "shell.execute_reply.started": "2025-09-15T19:09:45.382860Z"
+    }
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'vaultgemma-1b'"
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "HF_MODEL_ID = \"google/vaultgemma-1b\"\n",
+    "HUGGING_FACE_HUB_TOKEN = \"<REPLACE WITH TOKEN>\"\n",
+    "\n",
+    "base_name = HF_MODEL_ID.split('/')[-1].replace('.', '-').lower()\n",
+    "model_lineage = HF_MODEL_ID.split('/')[0]\n",
+    "base_name"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "af864803-2921-4dda-95d7-6177b7d720ec",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-09-15T19:07:30.037543Z",
+     "iopub.status.busy": "2025-09-15T19:07:30.037241Z",
+     "iopub.status.idle": "2025-09-15T19:07:30.041271Z",
+     "shell.execute_reply": "2025-09-15T19:07:30.040218Z",
+     "shell.execute_reply.started": "2025-09-15T19:07:30.037519Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "instance_type = \"ml.m5.xlarge\"\n",
+    "instance_count = 1"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "7245e684-aa6e-44ca-9451-10e0bee4b96e",
+   "metadata": {},
+   "source": [
+    "## Create SageMaker Model\n",
+    "\n",
+    "#### HUGGING_FACE_HUB_TOKEN \n",
+    "VaultGemma-1B is a gated model. Therefore, if you deploy model files hosted on the Hub, you need to provide your HuggingFace token as environment variable. This enables SageMaker AI to download the files at runtime."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "id": "2e196ece-bada-486a-8f20-b78a381de41b",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-09-15T19:13:36.536292Z",
+     "iopub.status.busy": "2025-09-15T19:13:36.535990Z",
+     "iopub.status.idle": "2025-09-15T19:13:36.714432Z",
+     "shell.execute_reply": "2025-09-15T19:13:36.713736Z",
+     "shell.execute_reply.started": "2025-09-15T19:13:36.536270Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "from sagemaker.huggingface import HuggingFaceModel\n",
+    "\n",
+    "# Hub Model configuration. https://huggingface.co/models\n",
+    "hub = {\n",
+    "\t'HF_MODEL_ID':'google/vaultgemma-1b',\n",
+    "\t'HF_TASK':'text-generation',\n",
+    "    'HF_TOKEN':HUGGING_FACE_HUB_TOKEN\n",
+    "}\n",
+    "\n",
+    "# create Hugging Face Model Class\n",
+    "huggingface_model = HuggingFaceModel(\n",
+    "\ttransformers_version='4.49.0',\n",
+    "\tpytorch_version='2.6.0',\n",
+    "\tpy_version='py312',\n",
+    "\tenv=hub,\n",
+    "\trole=role, \n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "40af4b17-30e9-4138-93f3-0589e4770815",
+   "metadata": {},
+   "source": [
+    "## Deploy Model to SageMaker Endpoint\n",
+    "\n",
+    "Now we'll deploy our model to a SageMaker endpoint for real-time inference. This is a significant step that:\n",
+    "1. Provisions the specified compute resources (M5 instance)\n",
+    "2. Deploys the model container\n",
+    "3. Sets up the endpoint for API access\n",
+    "\n",
+    "### Deployment Configuration\n",
+    "- **Instance Count**: 1 instance for single-node deployment\n",
+    "- **Instance Type**: `ml.m5.xlarge` for high-performance inference\n",
+    "\n",
+    "> ⚠️ **Important**: \n",
+    "> - Deployment can take up to 15 minutes\n",
+    "> - Monitor the CloudWatch logs for progress"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "786fa8a9-d635-412f-adc5-4941a80b2b72",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%time\n",
+    "\n",
+    "# deploy model to SageMaker Inference\n",
+    "predictor = huggingface_model.deploy(\n",
+    "\tinitial_instance_count=instance_count, # number of instances\n",
+    "\tinstance_type=instance_type # ec2 instance type\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "dc38898a-290b-482b-968c-f04b8674300c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "predictor.predict({\n",
+    "\t\"inputs\": \"Can you please let us know more details about your training using differential privacy?\",\n",
+    "})"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f7b85c32-d4df-45f1-84f4-86a296ad1c0d",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-09-15T19:14:57.135928Z",
+     "iopub.status.busy": "2025-09-15T19:14:57.135661Z",
+     "iopub.status.idle": "2025-09-15T19:14:57.139468Z",
+     "shell.execute_reply": "2025-09-15T19:14:57.138566Z",
+     "shell.execute_reply.started": "2025-09-15T19:14:57.135907Z"
+    }
+   },
+   "source": [
+    "# Clean up"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bcef37c7-6b47-4c3b-b6b7-b266245b492e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "huggingface_model.delete_model()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}