diff --git a/.github/workflows/functionality-helm-chart.yml b/.github/workflows/functionality-helm-chart.yml
index b138a092..74962056 100644
--- a/.github/workflows/functionality-helm-chart.yml
+++ b/.github/workflows/functionality-helm-chart.yml
@@ -28,6 +28,8 @@ jobs:
       - run: echo "💡 The ${{ github.repository }} repository has been cloned to the runner."
       - run: echo "🖥️ The workflow is now ready to test your code on the runner."
       - name: Deploy via helm charts
+        env:
+          DOCKER_BUILDKIT: 1
         run: |
           cd ${{ github.workspace }}
           sudo docker build -t localhost:5000/git-act-router -f docker/Dockerfile .
diff --git a/.gitignore b/.gitignore
index 72c38229..c4d9a7a9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -94,3 +94,6 @@ perf-test.py
 
 values-*.yaml
 helm/examples
+
+# version files
+src/vllm_router/_version.py
diff --git a/docker/Dockerfile b/docker/Dockerfile
index 4119860d..cfdb6c6e 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -1,17 +1,22 @@
-# Stage 1: Build
-FROM python:3.10-slim as builder
+FROM python:3.10-slim
 
 WORKDIR /app
 
-# Copy only the setup.py first to leverage Docker layer caching
+# hadolint ignore=DL3008
+RUN --mount=type=cache,target=/var/lib/apt --mount=type=cache,target=/var/cache/apt \
+    apt-get update && \
+    apt-get install -y --no-install-recommends git && \
+    rm -rf /var/lib/apt/lists/*
+
+# Copy the setup.dot py and the git metadata first (leverage Docker layer caching)
 COPY setup.py .
+COPY .git/ .git/
 
 # Copy the rest of the application code
 COPY src/ src/
 
-# Install dependencies (no cache to reduce size)
-RUN --mount=type=cache,target=/root/.cache/pip \
-    pip install --no-cache-dir --upgrade pip && \
+# Install dependencies (use cache, and delete after install, to speed up the build)
+RUN pip install --upgrade --no-cache-dir pip setuptools_scm && \
     pip install --no-cache-dir .
 
 # Set the entrypoint
diff --git a/pyproject.toml b/pyproject.toml
index 5d7bf33d..3356c340 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,2 +1,8 @@
+[build-system]
+requires = ["setuptools>=68", "setuptools_scm[toml]>=8.0"]
+
+[tool.setuptools_scm]
+write_to = "src/vllm_router/_version.py"
+
 [tool.isort]
 profile = "black"
diff --git a/setup.py b/setup.py
index 5d54a27e..cfd29228 100644
--- a/setup.py
+++ b/setup.py
@@ -2,7 +2,8 @@
 
 setup(
     name="vllm-router",
-    version="0.1.0",
+    use_scm_version=True,
+    setup_requires=["setuptools_scm"],
     packages=find_packages(where="src"),
     package_dir={"": "src"},
     # Should be the same as src/router/requirements.txt
diff --git a/src/vllm_router/router.py b/src/vllm_router/router.py
index ea209383..1a796d7a 100644
--- a/src/vllm_router/router.py
+++ b/src/vllm_router/router.py
@@ -27,12 +27,11 @@
     ServiceDiscoveryType,
 )
 from vllm_router.utils import set_ulimit, validate_url
+from vllm_router.version import __version__
 
 httpx_client_wrapper = HTTPXClientWrapper()
 logger = logging.getLogger("uvicorn")
 
-STACK_VERSION = "0.0.1"
-
 
 @asynccontextmanager
 async def lifespan(app: FastAPI):
@@ -455,7 +454,8 @@ async def route_completition(request: Request):
 
 @app.get("/version")
 async def show_version():
-    return JSONResponse(content={"version": STACK_VERSION})
+    ver = {"version": __version__}
+    return JSONResponse(content=ver)
 
 
 @app.get("/v1/models")
@@ -694,6 +694,15 @@ def parse_args():
         default=10,
         help="The interval in seconds to log statistics.",
     )
+
+    # Add --version argument
+    parser.add_argument(
+        "--version",
+        action="version",
+        version=f"%(prog)s {__version__}",
+        help="Show version and exit",
+    )
+
     args = parser.parse_args()
     validate_args(args)
     return args
diff --git a/src/vllm_router/version.py b/src/vllm_router/version.py
new file mode 100644
index 00000000..ff964d07
--- /dev/null
+++ b/src/vllm_router/version.py
@@ -0,0 +1,9 @@
+try:
+    from ._version import __version__, __version_tuple__
+except Exception as e:
+    import warnings
+
+    warnings.warn(f"Failed to read commit hash:\n{e}", RuntimeWarning, stacklevel=2)
+
+    __version__ = "dev"
+    __version_tuple__ = (0, 0, __version__)