diff --git a/.gitignore b/.gitignore
index 7c8ea71df..5639e9f97 100644
--- a/.gitignore
+++ b/.gitignore
@@ -71,10 +71,6 @@ instance/
 # Scrapy stuff:
 .scrapy
 
-# Sphinx documentation
-docs/_build/
-docs/source/getting_started/examples/
-
 # PyBuilder
 .pybuilder/
 target/
@@ -144,6 +140,7 @@ venv.bak/
 
 # mkdocs documentation
 /site
+docs/examples
 
 # mypy
 .mypy_cache/
diff --git a/.readthedocs.yaml b/.readthedocs.yaml
index 00ed22e59..853ca0bda 100644
--- a/.readthedocs.yaml
+++ b/.readthedocs.yaml
@@ -8,11 +8,8 @@ build:
   tools:
     python: "3.12"
 
-sphinx:
-  configuration: docs/source/conf.py
-  fail_on_warning: true
-# If using Sphinx, optionally build your docs in additional formats such as PDF
-formats: []
+mkdocs:
+  configuration: mkdocs.yaml
 
 # Optionally declare the Python requirements required to build your docs
 python:
diff --git a/docs/.nav.yml b/docs/.nav.yml
new file mode 100644
index 000000000..bba135d64
--- /dev/null
+++ b/docs/.nav.yml
@@ -0,0 +1,28 @@
+nav:
+  - Home:
+    - vLLM Spyre Plugin: README.md
+    - Getting Started:
+        - Installation: getting_started/installation.md
+    - Deploying:
+        - Docker: deploying/docker.md
+        - Kubernetes: deploying/k8s.md
+    - Examples:
+      - Offline Inference: examples/offline_inference
+      - Other: examples/other
+    - User Guide:
+        - Configuration: user_guide/configuration.md
+        - Environment Variables: user_guide/env_vars.md
+        - Supported Features: user_guide/supported_features.md
+        - Supported Models: user_guide/supported_models.md
+    - Developer Guide:
+      - Contributing: contributing/README.md
+
+  - Getting Started:
+      - Installation: getting_started/installation.md
+  - User Guide:
+      - Configuration: user_guide/configuration.md
+      - Environment Variables: user_guide/env_vars.md
+      - Supported Features: user_guide/supported_features.md
+      - Supported Models: user_guide/supported_models.md
+  - Developer Guide:
+    - Contributing: contributing/README.md
diff --git a/docs/Makefile b/docs/Makefile
deleted file mode 100644
index 92dd33a1a..000000000
--- a/docs/Makefile
+++ /dev/null
@@ -1,20 +0,0 @@
-# Minimal makefile for Sphinx documentation
-#
-
-# You can set these variables from the command line, and also
-# from the environment for the first two.
-SPHINXOPTS    ?=
-SPHINXBUILD   ?= sphinx-build
-SOURCEDIR     = source
-BUILDDIR      = _build
-
-# Put it first so that "make" without argument is like "make help".
-help:
-	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
-
-.PHONY: help Makefile
-
-# Catch-all target: route all unknown targets to Sphinx using the new
-# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
-%: Makefile
-	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
diff --git a/docs/README.md b/docs/README.md
index 0de4c9c4a..57fc2ab86 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -1,22 +1,18 @@
-# vLLM Spyre Plugin docs
+# Welcome to the vLLM Spyre Plugin
 
-Live doc: [vllm-spyre.readthedocs.io](https://vllm-spyre.readthedocs.io)
+<p style="text-align:center">
+<script async defer src="https://buttons.github.io/buttons.js"></script>
+<a class="github-button" href="https://github.com/vllm-project/vllm-spyre" data-show-count="true" data-size="large" aria-label="Star">Star</a>
+<a class="github-button" href="https://github.com/vllm-project/vllm-spyre/subscription" data-icon="octicon-eye" data-size="large" aria-label="Watch">Watch</a>
+<a class="github-button" href="https://github.com/vllm-project/vllm-spyre/fork" data-icon="octicon-repo-forked" data-size="large" aria-label="Fork">Fork</a>
+</p>
 
-## Build the docs
+**IBM Spyre** is the first production-grade Artificial Intelligence Unit (AIU) accelerator born out of the IBM Research AIU family, and is part of a long-term strategy of developing novel architectures and full-stack technology solutions for the emerging space of generative AI. Spyre builds on the foundation of IBM’s internal AIU research and delivers a scalable, efficient architecture for accelerating AI in enterprise environments.
 
-```bash
-# Install dependencies.
-pip install -r requirements-docs.txt
+The vLLM Spyre plugin (`vllm-spyre`) is a dedicated backend extension that enables seamless integration of IBM Spyre Accelerator with vLLM. It follows the architecture described in [vLLM's Plugin System](https://docs.vllm.ai/en/latest/design/plugin_system.html), making it easy to integrate IBM's advanced AI acceleration into existing vLLM workflows.
 
-# Build the docs.
-make clean
-make html
-```
+For more information, check out the following:
 
-## Open the docs with your browser
-
-```bash
-python -m http.server -d _build/html/
-```
-
-Launch your browser and open [localhost:8000](http://localhost:8000/).
+- 📚 [Meet the IBM Artificial Intelligence Unit](https://research.ibm.com/blog/ibm-artificial-intelligence-unit-aiu)
+- 📽️ [AI Accelerators: Transforming Scalability & Model Efficiency](https://www.youtube.com/watch?v=KX0qBM-ByAg)
+- 🚀 [Spyre Accelerator for IBM Z](https://research.ibm.com/blog/spyre-for-z)
diff --git a/docs/contributing/README.md b/docs/contributing/README.md
new file mode 100644
index 000000000..10daecb10
--- /dev/null
+++ b/docs/contributing/README.md
@@ -0,0 +1,161 @@
+# Contributing to vLLM Spyre
+
+Thank you for your interest in contributing to the Spyre plugin for vLLM! There are several ways you can contribute:
+
+- Identify and report any issues or bugs.
+- Suggest or implement new features.
+- Improve documentation or contribute a how-to guide.
+
+## Issues
+
+If you encounter a bug or have a feature request, please search [existing issues](https://github.com/vllm-project/vllm-spyre/issues?q=is%3Aissue) first to see if it has already been reported. If not, please [create a new issue](https://github.com/vllm-project/vllm-spyre/issues/new/choose), providing as much relevant information as possible.
+
+You can also reach out for support in the `#sig-spyre` channel in the [vLLM Slack](https://inviter.co/vllm-slack) workspace.
+
+## Developing
+
+### Building the docs with MkDocs
+
+#### Install MkDocs and Plugins
+
+Install MkDocs along with the [plugins](https://github.com/vllm-project/vllm-spyre/blob/main/mkdocs.yaml) used in the vLLM Spyre documentation.
+
+```bash
+pip install -r docs/requirements-docs.txt
+```
+
+!!! note
+    Ensure that your Python version is compatible with the plugins (e.g., `mkdocs-awesome-nav` requires Python 3.10+)
+
+#### Start the Development Server
+
+MkDocs comes with a built-in dev-server that lets you preview your documentation as you work on it.
+
+Make sure you're in the same directory as the `mkdocs.yml` configuration file in the `vllm-spyre` repository, and then start the server by running the `mkdocs serve` command:
+
+```bash
+mkdocs serve
+```
+
+Example output:
+
+```console
+INFO    -  Documentation built in 106.83 seconds
+INFO    -  [22:02:02] Watching paths for changes: 'docs', 'mkdocs.yaml'
+INFO    -  [22:02:02] Serving on http://127.0.0.1:8000/
+```
+
+#### View in Your Browser
+
+Open up [http://127.0.0.1:8000/](http://127.0.0.1:8000/) in your browser to see a live preview:.
+
+#### Learn More
+
+For additional features and advanced configurations, refer to the official [MkDocs Documentation](https://www.mkdocs.org/).
+
+## Testing
+
+### Testing Locally on CPU (No Spyre card)
+
+!!! tip
+    `xgrammar` is automatically installed on `x86_64` systems.
+
+Install `xgrammar` (only for `arm64` systems):
+
+```sh
+uv pip install xgrammar==0.1.19
+``` 
+
+Optionally, download the `JackFram/llama-160m` model:
+
+```sh
+python -c "from transformers import pipeline; pipeline('text-generation', model='JackFram/llama-160m')"
+```
+
+!!! caution
+    The Hugging Face API download does **not** work on `arm64`.
+
+By default, the model is saved to `.cache/huggingface/hub/models--JackFram--llama-160m`.
+
+Then, source the environment variables:
+
+```sh
+source _local_envs_for_test.sh
+```
+
+Optionally, install development dependencies:
+
+```sh
+uv pip install --group dev
+```
+
+Now, you can run the tests:
+  
+```sh
+python -m pytest -v -x tests -m "v1 and cpu and e2e"
+```
+
+Here is a list of `pytest` markers you can use to filter them:
+
+```python
+--8<-- "pyproject.toml:test-markers-definition"
+```
+
+### Testing Continuous Batching
+
+!!! attention
+    Continuous batching currently requires the custom installation described below until the FMS custom branch is merged to main.
+
+After completing the setup steps above, install custom FMS branch to enable support for continuous batching:
+
+```sh
+uv pip install git+https://github.com/foundation-model-stack/foundation-model-stack.git@paged_attn_mock --force-reinstall
+```
+
+Then, run the continuous batching tests:
+
+```sh
+python -m pytest -v -x tests/e2e -m cb
+```
+
+## Pull Requests
+
+### Linting
+
+When submitting a PR, please make sure your code passes all linting checks. You can install the linting requirements using either `uv` or `pip`.
+
+Using `uv`:
+
+```sh
+uv sync --frozen --group lint --active --inexact
+```
+
+Using `pip`:
+
+```sh
+uv pip compile --group lint > requirements-lint.txt
+pip install -r requirements-lint.txt
+```
+
+After installing the requirements, run the formatting script:
+
+```sh
+bash format.sh
+```
+
+Then, make sure to commit any changes made by the formatter:
+
+```sh
+git add .
+git commit -s -m "Apply linting and formatting"
+```
+
+### DCO and Signed-off-by
+
+When contributing, you must agree to the [DCO](https://github.com/vllm-project/vllm-spyre/blob/main/DCO).Commits must include a `Signed-off-by:` header which certifies agreement with the terms of the DCO.
+
+Using `-s` with `git commit` will automatically add this header.
+
+## License
+
+See <gh-file:LICENSE>.
diff --git a/docs/source/deploying/docker.md b/docs/deploying/docker.md
similarity index 75%
rename from docs/source/deploying/docker.md
rename to docs/deploying/docker.md
index 5cabf990a..023eda927 100644
--- a/docs/source/deploying/docker.md
+++ b/docs/deploying/docker.md
@@ -8,17 +8,15 @@ TODO: Add section on RHOAI officially supported images, once they exist
 
 Base images containing the driver stack for IBM Spyre accelerators are available from the [ibm-aiu](https://quay.io/repository/ibm-aiu/base?tab=tags) organization on Quay. This includes the `torch_sendnn` package, which is required for using torch with Spyre cards.
 
-:::{attention}
-These images contain an install of the `torch` package. The specific version installed is guaranteed to be compatible with `torch_sendnn`. Overwriting this install with a different version of `torch` may cause issues.
-:::
+!!! attention
+    These images contain an install of the `torch` package. The specific version installed is guaranteed to be compatible with `torch_sendnn`. Overwriting this install with a different version of `torch` may cause issues.
 
 ## Using community built images
 
-Community maintained images are also [available on quay](https://quay.io/repository/ibm-aiu/vllm-spyre?tab=tags), the latest x86 build is `quay.io/ibm-aiu/vllm-spyre:latest.amd64`.
+Community maintained images are also [available on Quay](https://quay.io/repository/ibm-aiu/vllm-spyre?tab=tags), the latest x86 build is `quay.io/ibm-aiu/vllm-spyre:latest.amd64`.
 
-:::{caution}
-These images are provided as a reference and come with no support guarantees.
-:::
+!!! caution
+    These images are provided as a reference and come with no support guarantees.
 
 ## Building vLLM Spyre's Docker Image from Source
 
@@ -28,9 +26,8 @@ You can build and run vLLM Spyre from source via the provided <gh-file:docker/Do
 DOCKER_BUILDKIT=1 docker build . --target release --tag vllm/vllm-spyre --file docker/Dockerfile.amd64
 ```
 
-:::{note}
-This Dockerfile currently only supports the x86 platform
-:::
+!!! note
+    This Dockerfile currently only supports the x86 platform
 
 ## Running vLLM Spyre in a Docker Container
 
diff --git a/docs/source/deploying/k8s.md b/docs/deploying/k8s.md
similarity index 96%
rename from docs/source/deploying/k8s.md
rename to docs/deploying/k8s.md
index 59a26b09c..6c0e27622 100644
--- a/docs/source/deploying/k8s.md
+++ b/docs/deploying/k8s.md
@@ -4,13 +4,12 @@ The vLLM Documentation on [Deploying with Kubernetes](https://docs.vllm.ai/en/la
 
 ## Deploying on Spyre Accelerators
 
-:::{note}
-**Prerequisite** Ensure that you have a running Kubernetes cluster with Spyre accelerators.
-:::
+!!! note
+    **Prerequisite**: Ensure that you have a running Kubernetes cluster with Spyre accelerators.
 
 <!-- TODO: Link to public docs for cluster setup -->
 
-1. Create PVCs and secrets for vLLM. These are all optional.
+1. (Optional) Create PVCs and secrets for vLLM.
 
       ```yaml
       apiVersion: v1
diff --git a/docs/getting_started/installation.md b/docs/getting_started/installation.md
new file mode 100644
index 000000000..a1b0ec60b
--- /dev/null
+++ b/docs/getting_started/installation.md
@@ -0,0 +1,47 @@
+# Installation
+
+We use the [uv](https://docs.astral.sh/uv/) package manager to manage the
+installation of the plugin and its dependencies. `uv` provides advanced
+dependency resolution which is required to properly install dependencies like
+`vllm` without overwriting critical dependencies like `torch`.
+
+First, clone the `vllm-spyre` repo:
+
+```sh
+git clone https://github.com/vllm-project/vllm-spyre.git
+cd vllm-spyre
+```
+
+Then, install `uv`:
+  
+```sh
+pip install uv
+```
+
+Now, create and activate a new [venv](https://docs.astral.sh/uv/pip/environments/):
+  
+```sh
+uv venv --python 3.12 --seed .venv
+source .venv/bin/activate
+```
+
+To install `vllm-spyre` locally with development dependencies, use the following command:
+
+```sh
+uv sync --frozen --active --inexact
+```
+
+To include optional linting dependencies, include `--group lint`:
+
+```sh
+uv sync --frozen --active --inexact --group lint
+```
+
+!!! tip
+    The `dev` group (i.e. `--group dev`) is enabled by default.
+
+Finally, the `torch` is needed to run examples and tests. If it is not already installed, install it using `pip`:
+
+```sh
+pip install torch==2.7.0
+```
diff --git a/docs/mkdocs/hooks/generate_examples.py b/docs/mkdocs/hooks/generate_examples.py
new file mode 100644
index 000000000..34ba0bb7e
--- /dev/null
+++ b/docs/mkdocs/hooks/generate_examples.py
@@ -0,0 +1,163 @@
+""" Sourced from https://github.com/vllm-project/vllm/blob/main/docs/mkdocs/hooks/generate_examples.py """  # noqa: E501
+# SPDX-License-Identifier: Apache-2.0
+import itertools
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Literal
+
+import regex as re
+
+ROOT_DIR = Path(__file__).parent.parent.parent.parent
+ROOT_DIR_RELATIVE = '../../../../..'
+EXAMPLE_DIR = ROOT_DIR / "examples"
+EXAMPLE_DOC_DIR = ROOT_DIR / "docs/examples"
+print(ROOT_DIR.resolve())
+print(EXAMPLE_DIR.resolve())
+print(EXAMPLE_DOC_DIR.resolve())
+
+
+def fix_case(text: str) -> str:
+    subs = {
+        "api": "API",
+        "cli": "CLI",
+        "cpu": "CPU",
+        "llm": "LLM",
+        "mae": "MAE",
+        "tpu": "TPU",
+        "aqlm": "AQLM",
+        "gguf": "GGUF",
+        "lora": "LoRA",
+        "rlhf": "RLHF",
+        "vllm": "vLLM",
+        "openai": "OpenAI",
+        "lmcache": "LMCache",
+        "multilora": "MultiLoRA",
+        "mlpspeculator": "MLPSpeculator",
+        r"fp\d+": lambda x: x.group(0).upper(),  # e.g. fp16, fp32
+        r"int\d+": lambda x: x.group(0).upper(),  # e.g. int8, int16
+    }
+    for pattern, repl in subs.items():
+        text = re.sub(rf'\b{pattern}\b', repl, text, flags=re.IGNORECASE)
+    return text
+
+
+@dataclass
+class Example:
+    """
+    Example class for generating documentation content from a given path.
+
+    Attributes:
+        path (Path): The path to the main directory or file.
+        category (str): The category of the document.
+        main_file (Path): The main file in the directory.
+        other_files (list[Path]): list of other files in the directory.
+        title (str): The title of the document.
+
+    Methods:
+        __post_init__(): Initializes the main_file, other_files, and title attributes.
+        determine_main_file() -> Path: Determines the main file in the given path.
+        determine_other_files() -> list[Path]: Determines other files in the directory excluding the main file.
+        determine_title() -> str: Determines the title of the document.
+        generate() -> str: Generates the documentation content.
+    """ # noqa: E501
+    path: Path
+    category: str = None
+    main_file: Path = field(init=False)
+    other_files: list[Path] = field(init=False)
+    title: str = field(init=False)
+
+    def __post_init__(self):
+        self.main_file = self.determine_main_file()
+        self.other_files = self.determine_other_files()
+        self.title = self.determine_title()
+
+    def determine_main_file(self) -> Path:
+        """
+        Determines the main file in the given path.
+        If the path is a file, it returns the path itself. Otherwise, it searches
+        for Markdown files (*.md) in the directory and returns the first one found.
+        Returns:
+            Path: The main file path, either the original path if it's a file or the first
+            Markdown file found in the directory.
+        Raises:
+            IndexError: If no Markdown files are found in the directory.
+        """ # noqa: E501
+        return self.path if self.path.is_file() else list(
+            self.path.glob("*.md")).pop()
+
+    def determine_other_files(self) -> list[Path]:
+        """
+        Determine other files in the directory excluding the main file.
+
+        This method checks if the given path is a file. If it is, it returns an empty list.
+        Otherwise, it recursively searches through the directory and returns a list of all
+        files that are not the main file.
+
+        Returns:
+            list[Path]: A list of Path objects representing the other files in the directory.
+        """ # noqa: E501
+        if self.path.is_file():
+            return []
+        is_other_file = lambda file: file.is_file() and file != self.main_file
+        return [file for file in self.path.rglob("*") if is_other_file(file)]
+
+    def determine_title(self) -> str:
+        return fix_case(self.path.stem.replace("_", " ").title())
+
+    def generate(self) -> str:
+        content = f"---\ntitle: {self.title}\n---\n\n"
+        content += f"Source <gh-file:{self.path.relative_to(ROOT_DIR)}>.\n\n"
+
+        # Use long code fence to avoid issues with
+        # included files containing code fences too
+        code_fence = "``````"
+        is_code = self.main_file.suffix != ".md"
+        if is_code:
+            content += f"{code_fence}{self.main_file.suffix[1:]}\n"
+        content += f'--8<-- "{self.main_file}"\n'
+        if is_code:
+            content += f"{code_fence}\n"
+        content += "\n"
+
+        if not self.other_files:
+            return content
+
+        content += "## Example materials\n\n"
+        for file in sorted(self.other_files):
+            content += f'??? abstract "{file.relative_to(self.path)}"\n'
+            if file.suffix != ".md":
+                content += f"    {code_fence}{file.suffix[1:]}\n"
+            content += f'    --8<-- "{file}"\n'
+            if file.suffix != ".md":
+                content += f"    {code_fence}\n"
+
+        return content
+
+
+def on_startup(command: Literal["build", "gh-deploy", "serve"], dirty: bool):
+    # Create the EXAMPLE_DOC_DIR if it doesn't exist
+    if not EXAMPLE_DOC_DIR.exists():
+        EXAMPLE_DOC_DIR.mkdir(parents=True)
+
+    categories = sorted(p for p in EXAMPLE_DIR.iterdir() if p.is_dir())
+
+    examples = []
+    glob_patterns = ["*.py", "*.md", "*.sh"]
+    # Find categorised examples
+    for category in categories:
+        globs = [category.glob(pattern) for pattern in glob_patterns]
+        for path in itertools.chain(*globs):
+            examples.append(Example(path, category.stem))
+        # Find examples in subdirectories
+        for path in category.glob("*/*.md"):
+            examples.append(Example(path.parent, category.stem))
+
+    # Generate the example documentation
+    for example in sorted(examples, key=lambda e: e.path.stem):
+        example_name = f"{example.path.stem}.md"
+        doc_path = EXAMPLE_DOC_DIR / example.category / example_name
+        print(doc_path)
+        if not doc_path.parent.exists():
+            doc_path.parent.mkdir(parents=True)
+        with open(doc_path, "w+") as f:
+            f.write(example.generate())
diff --git a/docs/mkdocs/hooks/url_schemes.py b/docs/mkdocs/hooks/url_schemes.py
new file mode 100644
index 000000000..4de2efe87
--- /dev/null
+++ b/docs/mkdocs/hooks/url_schemes.py
@@ -0,0 +1,54 @@
+""" Sourced from https://github.com/vllm-project/vllm/blob/main/docs/mkdocs/hooks/url_schemes.py """  # noqa: E501
+
+import regex as re
+from mkdocs.config.defaults import MkDocsConfig
+from mkdocs.structure.files import Files
+from mkdocs.structure.pages import Page
+
+
+def on_page_markdown(markdown: str, *, page: Page, config: MkDocsConfig,
+                     files: Files):
+    gh_icon = ":octicons-mark-github-16:"
+    gh_url = "https://github.com"
+    repo_url = f"{gh_url}/vllm-project/vllm-spyre"
+    org_url = f"{gh_url}/orgs/vllm-project"
+    urls = {
+        "issue": f"{repo_url}/issues",
+        "pr": f"{repo_url}/pull",
+        "project": f"{org_url}/projects",
+        "dir": f"{repo_url}/tree/main",
+        "file": f"{repo_url}/blob/main",
+    }
+    titles = {
+        "issue": "Issue #",
+        "pr": "Pull Request #",
+        "project": "Project #",
+        "dir": "",
+        "file": "",
+    }
+
+    scheme = r"gh-(?P<type>.+?):(?P<path>.+?)(#(?P<fragment>.+?))?"
+    inline_link = re.compile(r"\[(?P<title>[^\[]+?)\]\(" + scheme + r"\)")
+    auto_link = re.compile(f"<{scheme}>")
+
+    def replace_inline_link(match: re.Match) -> str:
+        url = f'{urls[match.group("type")]}/{match.group("path")}'
+        if fragment := match.group("fragment"):
+            url += f"#{fragment}"
+
+        return f'[{gh_icon} {match.group("title")}]({url})'
+
+    def replace_auto_link(match: re.Match) -> str:
+        type = match.group("type")
+        path = match.group("path")
+        title = f"{titles[type]}{path}"
+        url = f"{urls[type]}/{path}"
+        if fragment := match.group("fragment"):
+            url += f"#{fragment}"
+
+        return f"[{gh_icon} {title}]({url})"
+
+    markdown = inline_link.sub(replace_inline_link, markdown)
+    markdown = auto_link.sub(replace_auto_link, markdown)
+
+    return markdown
diff --git a/docs/requirements-docs.txt b/docs/requirements-docs.txt
index 2d18af238..a4f7a52db 100644
--- a/docs/requirements-docs.txt
+++ b/docs/requirements-docs.txt
@@ -1,9 +1,8 @@
-sphinx
-sphinx-argparse
-sphinx-book-theme
-sphinx-copybutton
-sphinx-design
-sphinx-togglebutton
-myst-parser
-msgspec
-sphinx-substitution-extensions
+mkdocs
+mkdocs-api-autonav
+mkdocs-material
+mkdocstrings-python
+mkdocs-gen-files
+mkdocs-awesome-nav
+python-markdown-math
+regex
\ No newline at end of file
diff --git a/docs/source/conf.py b/docs/source/conf.py
deleted file mode 100644
index 7c7e77bcf..000000000
--- a/docs/source/conf.py
+++ /dev/null
@@ -1,147 +0,0 @@
-# Configuration file for the Sphinx documentation builder.
-#
-# This file only contains a selection of the most common options. For a full
-# list see the documentation:
-# https://www.sphinx-doc.org/en/master/usage/configuration.html
-# This file is a part of the vllm-spyre project.
-# Adapted from vllm-project/vllm/docs/source/conf.py
-
-# -- Path setup --------------------------------------------------------------
-
-# If extensions (or modules to document with autodoc) are in another directory,
-# add these directories to sys.path here. If the directory is relative to the
-# documentation root, use os.path.abspath to make it absolute, like shown here.
-#
-# import os
-# import sys
-# sys.path.insert(0, os.path.abspath('.'))
-
-import os
-import sys
-from pathlib import Path
-
-REPO_ROOT = Path(__file__).resolve().parent.parent.parent
-sys.path.append(os.path.abspath(REPO_ROOT))
-
-# -- Project information -----------------------------------------------------
-
-project = 'vllm-spyre'
-copyright = '2025, IBM Research'
-author = 'IBM Research'
-
-# The full version, including alpha/beta/rc tags
-release = ''
-
-# -- General configuration ---------------------------------------------------
-
-# Add any Sphinx extension module names here, as strings. They can be
-# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
-# ones.
-
-# Copy from https://github.com/vllm-project/vllm/blob/main/docs/source/conf.py
-extensions = [
-    "sphinx.ext.napoleon",
-    "sphinx.ext.intersphinx",
-    "sphinx_copybutton",
-    "sphinx.ext.autodoc",
-    "sphinx.ext.autosummary",
-    "myst_parser",
-    "sphinxarg.ext",
-    "sphinx_design",
-    "sphinx_togglebutton",
-    "sphinx_substitution_extensions",
-]
-
-myst_enable_extensions = ["colon_fence", "substitution"]
-
-# Change this when cut down release
-myst_substitutions = {
-    # the branch of vllm, used in vllm clone
-    # - main branch: 'main'
-    # - vX.Y.Z branch: 'vX.Y.Z'
-    'vllm_version': 'v0.8.4',
-    # the branch of vllm-spyre, used in vllm-spyre clone and image tag
-    # - main branch: 'main'
-    # - vX.Y.Z branch: latest vllm-spyre release tag
-    'vllm_spyre_version': 'v0.1.0',
-    # the newest release version of vllm-spyre and matched vLLM, used
-    # in pip install.
-    # This value should be updated when cut down release.
-    'pip_vllm_spyre_version': "v0.1.0",
-    'pip_vllm_version': "0.8.4",
-}
-
-# Add any paths that contain templates here, relative to this directory.
-templates_path = ['_templates']
-
-# The language for content autogenerated by Sphinx. Refer to documentation
-# for a list of supported languages.
-#
-# This is also used if you do content translation via gettext catalogs.
-# Usually you set "language" from the command line for these cases.
-language = 'en'
-
-# List of patterns, relative to source directory, that match files and
-# directories to ignore when looking for source files.
-# This pattern also affects html_static_path and html_extra_path.
-exclude_patterns = [
-    '_build',
-    'Thumbs.db',
-    '.DS_Store',
-    '.venv',
-    'README.md',
-    'user_guide/release.template.md',
-]
-
-# -- Options for HTML output -------------------------------------------------
-
-# The theme to use for HTML and HTML Help pages.  See the documentation for
-# a list of builtin themes.
-# TODO: Add html_logo
-html_title = project
-html_theme = 'sphinx_book_theme'
-html_theme_options = {
-    'path_to_docs': 'docs/source',
-    'repository_url': 'https://github.com/vllm-project/vllm-spyre',
-    'use_repository_button': True,
-    'use_edit_page_button': True,
-}
-# Add any paths that contain custom static files (such as style sheets) here,
-# relative to this directory. They are copied after the builtin static files,
-# so a file named "default.css" will overwrite the builtin "default.css".
-# html_static_path = ['_static']
-
-myst_heading_anchors = 2
-myst_url_schemes = {
-    'http': None,
-    'https': None,
-    'mailto': None,
-    'ftp': None,
-    "gh-issue": {
-        "url":
-        "https://github.com/vllm-project/vllm-spyre/issues/{{path}}#{{fragment}}",
-        "title": "Issue #{{path}}",
-        "classes": ["github"],
-    },
-    "gh-pr": {
-        "url":
-        "https://github.com/vllm-project/vllm-spyre/pull/{{path}}#{{fragment}}",
-        "title": "Pull Request #{{path}}",
-        "classes": ["github"],
-    },
-    "gh-dir": {
-        "url": "https://github.com/vllm-project/vllm-spyre/tree/main/{{path}}",
-        "title": "{{path}}",
-        "classes": ["github"],
-    },
-    "gh-file": {
-        "url": "https://github.com/vllm-project/vllm-spyre/blob/main/{{path}}",
-        "title": "{{path}}",
-        "classes": ["github"],
-    },
-}
-
-
-def setup(app):
-    from docs.source.generate_examples import generate_examples
-    generate_examples()
diff --git a/docs/source/contributing/overview.md b/docs/source/contributing/overview.md
deleted file mode 100644
index ad108fa77..000000000
--- a/docs/source/contributing/overview.md
+++ /dev/null
@@ -1,133 +0,0 @@
-# Contributing
-
-Thank you for your interest in contributing to Spyre support on vLLM!
-
-There are several ways you can contribute:
-
-- Identify and report any issues or bugs.
-- Suggest or implement new features.
-- Improve documentation or contribute a how-to guide.
-
-## Issues
-
-If you encounter a bug or have a feature request, please search [existing issues](https://github.com/vllm-project/vllm-spyre/issues?q=is%3Aissue) first to see if it has already been reported. If not, please [create a new issue](https://github.com/vllm-project/vllm-spyre/issues/new/choose), providing as much relevant information as possible.
-
-You can also reach out for support in the `#sig-spyre` channel in the [vLLM Slack](https://inviter.co/vllm-slack) workspace.
-
-## Testing
-
-### Testing Locally on CPU (No Spyre card)
-  
-1. Install `xgrammar` (only for `arm64` systems):
-  
-   :::{tip}
-   `xgrammar` is automatically installed on `x86_64` systems.
-   :::
-
-   ```sh
-   uv pip install xgrammar==0.1.19
-   ```
-
-1. (Optional) Download the `JackFram/llama-160m` model:
-
-   ```sh
-   python -c "from transformers import pipeline; pipeline('text-generation', model='JackFram/llama-160m')"
-   ```
-
-   :::{caution}
-   The Hugging Face API download does **not** work on `arm64`.
-   :::
-
-   By default, the model is saved to:
-
-   ```sh
-   .cache/huggingface/hub/models--JackFram--llama-160m
-   ```
-
-1. Source environment variables:
-
-   ```sh
-   source _local_envs_for_test.sh
-   ```
-
-1. (Optional) Install development dependencies:
-  
-   ```sh
-   uv pip install --group dev
-   ```
-
-1. Run the tests:
-  
-   ```sh
-   python -m pytest -v -x tests -m "v1 and cpu and e2e"
-   ```
-
-   Here are a list of `pytest` markers you can use to filter tests:
-
-   :::{literalinclude} ../../../pyproject.toml
-   :start-after: begin-test-markers-definition
-   :end-before: end-test-markers-definition
-   :language: python
-   :::
-
-### Testing Continuous Batching
-
-:::{attention}
-Continuous batching currently requires the custom installation described below until the FMS custom branch is merged to main.
-:::
-
-After completing the setup steps above:
-
-1. Install custom FMS branch to enable support for continuous batching:
-
-   ```sh
-   uv pip install git+https://github.com/foundation-model-stack/foundation-model-stack.git@paged_attn_mock --force-reinstall
-   ```
-
-2. Run the continuous batching tests:
-
-   ```sh
-   python -m pytest -v -x tests/e2e -m cb
-   ```
-
-## Pull Requests
-
-### Linting
-
-When submitting a PR, please make sure your code passes all linting checks. You can install the linting requirements using either `uv` or `pip`.
-
-Using `uv`:
-
-```bash
-uv sync --frozen --group lint --active --inexact
-```
-
-Using `pip`:
-
-```bash
-uv pip compile --group lint > requirements-lint.txt
-pip install -r requirements-lint.txt
-```
-
-After installing the requirements, run the formatting script:
-
-```bash
-bash format.sh
-```
-
-Then, make sure to commit any changes made by the formatter:
-
-```bash
-git add .
-git commit -s -m "Apply linting and formatting"
-```
-
-### DCO and Signed-off-by
-
-When contributing, you must agree to the [DCO](https://github.com/vllm-project/vllm-spyre/blob/main/DCO).Commits must include a `Signed-off-by:` header which certifies agreement with the terms of the DCO.
-
-Using `-s` with `git commit` will automatically add this header.
-
-## License
-
-See <gh-file:LICENSE>.
diff --git a/docs/source/generate_examples.py b/docs/source/generate_examples.py
deleted file mode 100644
index 08ea53edb..000000000
--- a/docs/source/generate_examples.py
+++ /dev/null
@@ -1,244 +0,0 @@
-""" Sourced from https://github.com/vllm-project/vllm/blob/main/docs/source/generate_examples.py """  # noqa: E501
-
-import itertools
-import re
-from dataclasses import dataclass, field
-from pathlib import Path
-
-ROOT_DIR = Path(__file__).parent.parent.parent.resolve()
-ROOT_DIR_RELATIVE = '../../../..'
-EXAMPLE_DIR = ROOT_DIR / "examples"
-EXAMPLE_DOC_DIR = ROOT_DIR / "docs/source/getting_started/examples"
-
-
-def fix_case(text: str) -> str:
-    subs = {
-        "api": "API",
-        "cli": "CLI",
-        "cpu": "CPU",
-        "llm": "LLM",
-        "mae": "MAE",
-        "tpu": "TPU",
-        "aqlm": "AQLM",
-        "gguf": "GGUF",
-        "lora": "LoRA",
-        "rlhf": "RLHF",
-        "vllm": "vLLM",
-        "openai": "OpenAI",
-        "lmcache": "LMCache",
-        "multilora": "MultiLoRA",
-        "mlpspeculator": "MLPSpeculator",
-        r"fp\d+": lambda x: x.group(0).upper(),  # e.g. fp16, fp32
-        r"int\d+": lambda x: x.group(0).upper(),  # e.g. int8, int16
-    }
-    for pattern, repl in subs.items():
-        text = re.sub(rf'\b{pattern}\b', repl, text, flags=re.IGNORECASE)
-    return text
-
-
-@dataclass
-class Index:
-    """
-    Index class to generate a structured document index.
-
-    Attributes:
-        path (Path): The path save the index file to.
-        title (str): The title of the index.
-        description (str): A brief description of the index.
-        caption (str): An optional caption for the table of contents.
-        maxdepth (int): The maximum depth of the table of contents. Defaults to 1.
-        documents (list[str]): A list of document paths to include in the index. Defaults to an empty list.
-
-    Methods:
-        generate() -> str:
-            Generates the index content as a string in the specified format.
-    """ # noqa: E501
-    path: Path
-    title: str
-    description: str
-    caption: str
-    maxdepth: int = 1
-    documents: list[str] = field(default_factory=list)
-
-    def generate(self) -> str:
-        content = f"# {self.title}\n\n{self.description}\n\n"
-        content += ":::{toctree}\n"
-        content += f":caption: {self.caption}\n:maxdepth: {self.maxdepth}\n"
-        content += "\n".join(self.documents) + "\n:::\n"
-        return content
-
-
-@dataclass
-class Example:
-    """
-    Example class for generating documentation content from a given path.
-
-    Attributes:
-        path (Path): The path to the main directory or file.
-        category (str): The category of the document.
-        main_file (Path): The main file in the directory.
-        other_files (list[Path]): list of other files in the directory.
-        title (str): The title of the document.
-
-    Methods:
-        __post_init__(): Initializes the main_file, other_files, and title attributes.
-        determine_main_file() -> Path: Determines the main file in the given path.
-        determine_other_files() -> list[Path]: Determines other files in the directory excluding the main file.
-        determine_title() -> str: Determines the title of the document.
-        generate() -> str: Generates the documentation content.
-    """ # noqa: E501
-    path: Path
-    category: str = None
-    main_file: Path = field(init=False)
-    other_files: list[Path] = field(init=False)
-    title: str = field(init=False)
-
-    def __post_init__(self):
-        self.main_file = self.determine_main_file()
-        self.other_files = self.determine_other_files()
-        self.title = self.determine_title()
-
-    def determine_main_file(self) -> Path:
-        """
-        Determines the main file in the given path.
-        If the path is a file, it returns the path itself. Otherwise, it searches
-        for Markdown files (*.md) in the directory and returns the first one found.
-        Returns:
-            Path: The main file path, either the original path if it's a file or the first
-            Markdown file found in the directory.
-        Raises:
-            IndexError: If no Markdown files are found in the directory.
-        """ # noqa: E501
-        return self.path if self.path.is_file() else list(
-            self.path.glob("*.md")).pop()
-
-    def determine_other_files(self) -> list[Path]:
-        """
-        Determine other files in the directory excluding the main file.
-
-        This method checks if the given path is a file. If it is, it returns an empty list.
-        Otherwise, it recursively searches through the directory and returns a list of all
-        files that are not the main file.
-
-        Returns:
-            list[Path]: A list of Path objects representing the other files in the directory.
-        """ # noqa: E501
-        if self.path.is_file():
-            return []
-        is_other_file = lambda file: file.is_file() and file != self.main_file
-        return [file for file in self.path.rglob("*") if is_other_file(file)]
-
-    def determine_title(self) -> str:
-        return fix_case(self.path.stem.replace("_", " ").title())
-
-    def generate(self) -> str:
-        # Convert the path to a relative path from __file__
-        make_relative = lambda path: ROOT_DIR_RELATIVE / path.relative_to(
-            ROOT_DIR)
-
-        content = f"Source <gh-file:{self.path.relative_to(ROOT_DIR)}>.\n\n"
-        include = "include" if self.main_file.suffix == ".md" else \
-            "literalinclude"
-        if include == "literalinclude":
-            content += f"# {self.title}\n\n"
-        content += f":::{{{include}}} {make_relative(self.main_file)}\n"
-        if include == "literalinclude":
-            content += f":language: {self.main_file.suffix[1:]}\n"
-        content += ":::\n\n"
-
-        if not self.other_files:
-            return content
-
-        content += "## Example materials\n\n"
-        for file in sorted(self.other_files):
-            include = "include" if file.suffix == ".md" else "literalinclude"
-            content += f":::{{admonition}} {file.relative_to(self.path)}\n"
-            content += ":class: dropdown\n\n"
-            content += f":::{{{include}}} {make_relative(file)}\n:::\n"
-            content += ":::\n\n"
-
-        return content
-
-
-def generate_examples():
-    # Create the EXAMPLE_DOC_DIR if it doesn't exist
-    if not EXAMPLE_DOC_DIR.exists():
-        EXAMPLE_DOC_DIR.mkdir(parents=True)
-
-    # Create empty indices
-    examples_index = Index(
-        path=EXAMPLE_DOC_DIR / "examples_index.md",
-        title="Examples",
-        description=
-        "A collection of examples demonstrating usage of vLLM-Spyre.\nAll documented examples are autogenerated using <gh-file:docs/source/generate_examples.py> from examples found in <gh-file:examples>.",  # noqa: E501
-        caption="Examples",
-        maxdepth=2)
-    # Category indices stored in reverse order because they are inserted into
-    # examples_index.documents at index 0 in order
-    category_indices = {
-        "other":
-        Index(
-            path=EXAMPLE_DOC_DIR / "examples_other_index.md",
-            title="Other",
-            description=
-            "Other examples that don't strongly fit into the online or offline serving categories.",  # noqa: E501
-            caption="Examples",
-        ),
-        "online_serving":
-        Index(
-            path=EXAMPLE_DOC_DIR / "examples_online_serving_index.md",
-            title="Online Serving",
-            description=
-            "Online serving examples demonstrate how to use vLLM-Spyre in an online setting, where the model is queried for predictions in real-time.",  # noqa: E501
-            caption="Examples",
-        ),
-        "offline_inference":
-        Index(
-            path=EXAMPLE_DOC_DIR / "examples_offline_inference_index.md",
-            title="Offline Inference",
-            description=
-            "Offline inference examples demonstrate how to use vLLM-Spyre in an offline setting, where the model is queried for predictions in batches.",  # noqa: E501
-            caption="Examples",
-        ),
-    }
-
-    examples = []
-    glob_patterns = ["*.py", "*.md", "*.sh"]
-    # Find categorised examples
-    for category in category_indices:
-        category_dir = EXAMPLE_DIR / category
-        globs = [category_dir.glob(pattern) for pattern in glob_patterns]
-        for path in itertools.chain(*globs):
-            examples.append(Example(path, category))
-        # Find examples in subdirectories
-        for path in category_dir.glob("*/*.md"):
-            examples.append(Example(path.parent, category))
-    # Find uncategorised examples
-    globs = [EXAMPLE_DIR.glob(pattern) for pattern in glob_patterns]
-    for path in itertools.chain(*globs):
-        examples.append(Example(path))
-    # Find examples in subdirectories
-    for path in EXAMPLE_DIR.glob("*/*.md"):
-        # Skip categorised examples
-        if path.parent.name in category_indices:
-            continue
-        examples.append(Example(path.parent))
-
-    # Generate the example documentation
-    for example in sorted(examples, key=lambda e: e.path.stem):
-        doc_path = EXAMPLE_DOC_DIR / f"{example.path.stem}.md"
-        with open(doc_path, "w+") as f:
-            f.write(example.generate())
-        # Add the example to the appropriate index
-        index = category_indices.get(example.category, examples_index)
-        index.documents.append(example.path.stem)
-
-    # Generate the index files
-    for category_index in category_indices.values():
-        if category_index.documents:
-            examples_index.documents.insert(0, category_index.path.name)
-            with open(category_index.path, "w+") as f:
-                f.write(category_index.generate())
-
-    with open(examples_index.path, "w+") as f:
-        f.write(examples_index.generate())
diff --git a/docs/source/getting_started/installation.md b/docs/source/getting_started/installation.md
deleted file mode 100644
index a343e6c5a..000000000
--- a/docs/source/getting_started/installation.md
+++ /dev/null
@@ -1,56 +0,0 @@
-# Installation
-
-We use the [uv](https://docs.astral.sh/uv/) package manager to manage the
-installation of the plugin and its dependencies. `uv` provides advanced
-dependency resolution which is required to properly install dependencies like
-`vllm` without overwriting critical dependencies like `torch`.
-
-1. Clone vllm-spyre
-
-   ```sh
-   git clone https://github.com/vllm-project/vllm-spyre.git
-   cd vllm-spyre
-   ```
-
-1. Install uv
-  
-   ```sh
-   pip install uv
-   ```
-  
-1. Create a new env
-
-   ```sh
-   uv venv --python 3.12 --seed .venv
-   ```
-
-1. Activate it
-  
-   ```sh
-   source .venv/bin/activate
-   ```
-
-1. Install `vllm-spyre` locally with dev (and optionally lint) dependencies
-  
-   ```sh
-   uv sync --frozen --active --inexact
-   ```
-  
-   or also with lint:
-  
-   ```sh
-   uv sync --frozen --active --inexact --group lint
-   ```
-
-   :::{tip}
-   `--group dev` is enabled by default
-   :::
-
-1. (Optional) Install torch through pip
-  
-   If you don't have it installed already. Will be needed
-   for running examples or tests.
-  
-   ```sh
-   pip install torch==2.7.0
-   ```
diff --git a/docs/source/getting_started/spyre_overview.md b/docs/source/getting_started/spyre_overview.md
deleted file mode 100644
index c82c69e2f..000000000
--- a/docs/source/getting_started/spyre_overview.md
+++ /dev/null
@@ -1,17 +0,0 @@
-# IBM Spyre Overview
-
-**IBM Spyre** is the first production-grade Artificial Intelligence Unit (AIU) accelerator born out of the IBM Research AIU family, and is part of a long-term strategy of developing novel architectures and full-stack technology solutions for the emerging space of generative AI.
-
-Spyre builds on the foundation of IBM’s internal AIU research and delivers a scalable, efficient architecture for accelerating AI in enterprise environments.
-
-## 🔍 Learn More
-
-- 📚 [Meet the IBM Artificial Intelligence Unit](https://research.ibm.com/blog/ibm-artificial-intelligence-unit-aiu)
-- 📽️ [AI Accelerators: Transforming Scalability & Model Efficiency](https://www.youtube.com/watch?v=KX0qBM-ByAg)
-- 🚀 [Spyre Accelerator for IBM Z](https://research.ibm.com/blog/spyre-for-z)
-
-## See Also
-
-- <project:./installation.md>
-- <project:../user_guide/configuration.md>
-- <project:../user_guide/supported_features.md>
diff --git a/docs/source/index.md b/docs/source/index.md
deleted file mode 100644
index 942d41e9e..000000000
--- a/docs/source/index.md
+++ /dev/null
@@ -1,44 +0,0 @@
-# Welcome to the vLLM Spyre Plugin
-
-:::
-<p style="text-align:center">
-<script async defer src="https://buttons.github.io/buttons.js"></script>
-<a class="github-button" href="https://github.com/vllm-project/vllm-spyre" data-show-count="true" data-size="large" aria-label="Star">Star</a>
-<a class="github-button" href="https://github.com/vllm-project/vllm-spyre/subscription" data-icon="octicon-eye" data-size="large" aria-label="Watch">Watch</a>
-<a class="github-button" href="https://github.com/vllm-project/vllm-spyre/fork" data-icon="octicon-repo-forked" data-size="large" aria-label="Fork">Fork</a>
-</p>
-:::
-
-The vLLM Spyre plugin (`vllm-spyre`) is a dedicated backend extension that enables seamless integration of IBM Spyre Accelerator with vLLM. It follows the architecture described in [vLLM's Plugin System](https://docs.vllm.ai/en/latest/design/plugin_system.html), making it easy to integrate IBM's advanced AI acceleration into existing vLLM workflows.
-
-## Documentation
-
-:::{toctree}
-:caption: Getting Started
-:maxdepth: 1
-getting_started/spyre_overview
-getting_started/installation
-getting_started/examples/examples_index
-:::
-
-:::{toctree}
-:caption: User Guide
-:maxdepth: 1
-user_guide/configuration
-user_guide/env_vars
-user_guide/supported_features
-user_guide/supported_models
-:::
-
-:::{toctree}
-:caption: Deploying
-:maxdepth: 1
-deploying/docker
-deploying/k8s
-:::
-
-:::{toctree}
-:caption: Developer Guide
-:maxdepth: 2
-contributing/overview
-:::
diff --git a/docs/source/user_guide/env_vars.md b/docs/source/user_guide/env_vars.md
deleted file mode 100644
index d32f0ebf7..000000000
--- a/docs/source/user_guide/env_vars.md
+++ /dev/null
@@ -1,9 +0,0 @@
-# Environment Variables
-
-vLLM Spyre uses the following environment variables to configure the system:
-
-:::{literalinclude} ../../../vllm_spyre/envs.py
-:end-before: end-env-vars-definition
-:language: python
-:start-after: begin-env-vars-definition
-:::
diff --git a/docs/source/user_guide/configuration.md b/docs/user_guide/configuration.md
similarity index 81%
rename from docs/source/user_guide/configuration.md
rename to docs/user_guide/configuration.md
index e6c3193cc..1dc1372be 100644
--- a/docs/source/user_guide/configuration.md
+++ b/docs/user_guide/configuration.md
@@ -1,6 +1,6 @@
 # Configuration
 
-For a complete list of configuration options, see <project:./env_vars.md>.
+For a complete list of configuration options, see [Environment Variables](env_vars.md).
 
 ## Backend Selection
 
@@ -26,9 +26,8 @@ With static batching, graphs are pre-compiled for the configured batch shapes an
 
 Static batching mode is enabled by default, and can be explicitly enabled by setting `VLLM_USE_CB=0`.
 
-:::{caution}
-There are no up-front checks that the compiled graphs will fit into the available memory on the Spyre cards. If the graphs are too large for the available memory, vllm will crash during model warmup.
-:::
+!!! caution
+    There are no up-front checks that the compiled graphs will fit into the available memory on the Spyre cards. If the graphs are too large for the available memory, vllm will crash during model warmup.
 
 The batch shapes are configured with the `VLLM_SPYRE_WARMUP_*` environment variables. For example, to warm up two graph shapes for one single large request and four smaller requests you could use:
 
@@ -40,9 +39,8 @@ export VLLM_SPYRE_WARMUP_NEW_TOKENS=1024,256
 
 ### Continuous Batching
 
-:::{attention}
-Continuous batching is not currently supported on IBM Spyre Accelerators. A CPU-only implementation is available by setting `VLLM_SPYRE_DYNAMO_BACKEND=eager`. Continuous batching can be enabled with `VLLM_USE_CB=1`.
-:::
+!!! attention
+    Continuous batching is not currently supported on IBM Spyre Accelerators. A CPU-only implementation is available by setting `VLLM_SPYRE_DYNAMO_BACKEND=eager`. Continuous batching can be enabled with `VLLM_USE_CB=1`.
 
 Continuous batching works much more like other accelerator implementations on vLLM. Requests can be continually appended to a running batch, and requests that finish generating can be evicted from the batch to make room for more requests. Neither chunked prefill nor prefix caching are currently supported though, so when a request is added to the running batch it must first be paused for a full prefill of the incoming prompt.
 
diff --git a/docs/user_guide/env_vars.md b/docs/user_guide/env_vars.md
new file mode 100644
index 000000000..8eb05f7d8
--- /dev/null
+++ b/docs/user_guide/env_vars.md
@@ -0,0 +1,7 @@
+# Environment Variables
+
+vLLM Spyre uses the following environment variables to configure the system:
+
+```python
+--8<-- "vllm_spyre/envs.py:env-vars-definition"
+```
diff --git a/docs/source/user_guide/supported_features.md b/docs/user_guide/supported_features.md
similarity index 100%
rename from docs/source/user_guide/supported_features.md
rename to docs/user_guide/supported_features.md
diff --git a/docs/source/user_guide/supported_models.md b/docs/user_guide/supported_models.md
similarity index 100%
rename from docs/source/user_guide/supported_models.md
rename to docs/user_guide/supported_models.md
diff --git a/mkdocs.yaml b/mkdocs.yaml
new file mode 100644
index 000000000..d2e093096
--- /dev/null
+++ b/mkdocs.yaml
@@ -0,0 +1,109 @@
+site_name: vLLM Spyre Plugin
+site_url: https://vllm-spyre.readthedocs.io/
+repo_url: https://github.com/vllm-project/vllm-spyre
+exclude_docs: |
+  *.inc.md
+  *.template.md
+theme:
+  name: material
+  palette:
+    # Palette toggle for automatic mode
+    - media: "(prefers-color-scheme)"
+      toggle:
+        icon: material/brightness-auto
+        name: Switch to light mode
+    # Palette toggle for light mode
+    - media: "(prefers-color-scheme: light)"
+      scheme: default 
+      primary: white
+      toggle:
+        icon: material/brightness-7
+        name: Switch to dark mode
+    # Palette toggle for dark mode
+    - media: "(prefers-color-scheme: dark)"
+      scheme: slate
+      primary: black
+      toggle:
+        icon: material/brightness-2
+        name: Switch to system preference
+  features:
+    - content.code.copy
+    - content.tabs.link
+    - navigation.tracking
+    - navigation.tabs
+    - navigation.sections
+    - navigation.prune
+    - navigation.top
+    - search.highlight
+    - search.share
+    - toc.follow
+
+hooks:
+  - docs/mkdocs/hooks/generate_examples.py
+  - docs/mkdocs/hooks/url_schemes.py
+
+plugins:
+  - meta
+  - search
+  - autorefs
+  - awesome-nav
+  - mkdocstrings:
+      handlers:
+        python:
+          options:
+            show_symbol_type_heading: true
+            show_symbol_type_toc: true
+            filters: []
+            summary:
+              modules: true
+            show_if_no_docstring: true
+            show_signature_annotations: true
+            separate_signature: true
+            show_overloads: true
+            signature_crossrefs: true
+          inventories:
+          - https://docs.python.org/3/objects.inv
+          - https://typing-extensions.readthedocs.io/en/latest/objects.inv
+          - https://docs.aiohttp.org/en/stable/objects.inv
+          - https://pillow.readthedocs.io/en/stable/objects.inv
+          - https://numpy.org/doc/stable/objects.inv
+          - https://pytorch.org/docs/stable/objects.inv
+          - https://psutil.readthedocs.io/en/stable/objects.inv
+
+markdown_extensions:
+  - attr_list
+  - md_in_html
+  - admonition
+  - pymdownx.details
+  # For content tabs
+  - pymdownx.superfences
+  - pymdownx.tabbed:
+      slugify: !!python/object/apply:pymdownx.slugs.slugify
+        kwds:
+          case: lower
+      alternate_style: true
+  # For code highlighting
+  - pymdownx.highlight:
+      anchor_linenums: true
+      line_spans: __span
+      pygments_lang_class: true
+  - pymdownx.inlinehilite
+  - pymdownx.snippets
+  # For emoji and icons
+  - pymdownx.emoji:
+      emoji_index: !!python/name:material.extensions.emoji.twemoji
+      emoji_generator: !!python/name:material.extensions.emoji.to_svg
+  # For in page [TOC] (not sidebar)
+  - toc:
+      permalink: true
+  # For math rendering
+  - mdx_math:
+      enable_dollar_delimiter: true
+
+extra_javascript:
+  - https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS_HTML
+
+# Makes the url format end in .html rather than act as a dir
+# So index.md generates as index.html and is available under URL /index.html
+# https://www.mkdocs.org/user-guide/configuration/#use_directory_urls
+use_directory_urls: false
diff --git a/pyproject.toml b/pyproject.toml
index a3103d720..6c4a8750a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -117,7 +117,7 @@ skip_gitignore = true
 [tool.pytest.ini_options]
 pythonpath = ["."]
 
-# begin-test-markers-definition
+# --8<-- [start:test-markers-definition]
 markers = [
     "skip_global_cleanup",
     "e2e: Tests using end-to-end engine spin-up",
@@ -133,7 +133,7 @@ markers = [
     "utils: Tests for utility functions",
     "worker: Tests for worker logic",
 ]
-# end-test-markers-definition
+# --8<-- [end:test-markers-definition]
 
 [tool.pymarkdown]
 plugins.md013.enabled = false # line-length
diff --git a/vllm_spyre/envs.py b/vllm_spyre/envs.py
index 2c51d7e5e..1ec0d3813 100644
--- a/vllm_spyre/envs.py
+++ b/vllm_spyre/envs.py
@@ -11,8 +11,7 @@
     VLLM_SPYRE_PERF_METRIC_LOGGING_ENABLED: int = 0
     VLLM_SPYRE_PERF_METRIC_LOGGING_DIR: str = "/tmp"
 
-# begin-env-vars-definition
-
+# --8<-- [start:env-vars-definition]
 environment_variables: dict[str, Callable[[], Any]] = {
     # Defines the prompt lengths the Spyre accelerator should be prepared
     # for, formatted as comma separated list. Only applicable in static batching
@@ -70,8 +69,7 @@
     "VLLM_SPYRE_PERF_METRIC_LOGGING_DIR":
     lambda: os.getenv("VLLM_SPYRE_PERF_METRIC_LOGGING_DIR", "/tmp"),
 }
-
-# end-env-vars-definition
+# --8<-- [end:env-vars-definition]
 
 
 def __getattr__(name: str):