diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
new file mode 100644
index 0000000..c309c2f
--- /dev/null
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -0,0 +1,37 @@
+---
+name: Pull Request
+about: Contribute a change to XMem
+---
+
+## Summary
+<!-- What does this PR do? (1-3 sentences) -->
+
+
+## Motivation / Problem
+<!-- Why is this change needed? Link to the relevant issue. -->
+
+Closes #<!-- issue number -->
+
+## Changes
+<!-- Bullet-point list of what you changed -->
+- 
+
+## Testing
+<!-- How did you verify this works? -->
+- [ ] Unit tests added / updated (`pytest tests/unit`)
+- [ ] Integration tests pass (`pytest tests/integration`)
+- [ ] Tested manually — steps below:
+
+```
+# command to reproduce
+```
+
+## Screenshots / recordings (if UI change)
+<!-- Drag & drop a screenshot or screen recording -->
+
+## Checklist
+- [ ] My PR title follows [Conventional Commits](https://www.conventionalcommits.org/) (`feat(scope): description`)
+- [ ] I ran `ruff check .` and `black --check .` locally with no errors
+- [ ] I updated `CHANGELOG.md` if this is a user-visible change
+- [ ] I ran `uv lock` if I modified `pyproject.toml`
+- [ ] Security-sensitive files modified? Pinged `@ishaanxgupta` or `@ved015`
diff --git a/.github/workflows/api-schema-diff.yml b/.github/workflows/api-schema-diff.yml
new file mode 100644
index 0000000..f4475b2
--- /dev/null
+++ b/.github/workflows/api-schema-diff.yml
@@ -0,0 +1,162 @@
+# API diff check — detect breaking changes in OpenAPI schema on PRs.
+# Compares the OpenAPI spec from the PR branch against `develop` and
+# posts a diff comment so reviewers can see exactly what API surface changed.
+
+name: API Schema Diff
+
+on:
+  pull_request:
+    branches: [develop, main]
+    paths:
+      - "src/api/**"
+      - "src/schemas/**"
+
+permissions:
+  contents: read
+  pull-requests: write
+
+concurrency:
+  group: api-diff-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  diff:
+    name: Detect API breaking changes
+    runs-on: ubuntu-latest
+    timeout-minutes: 10
+
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+          cache: pip
+
+      - name: Install dependencies
+        run: |
+          pip install -e ".[dev]"
+
+      - name: Generate OpenAPI spec (PR branch)
+        run: |
+          python -c "
+          import json, os
+          os.environ.setdefault('API_KEYS', '[\"test\"]')
+          os.environ.setdefault('JWT_SECRET_KEY', 'test')
+          os.environ.setdefault('PINECONE_API_KEY', 'test')
+          os.environ.setdefault('PINECONE_INDEX_NAME', 'test')
+          os.environ.setdefault('NEO4J_PASSWORD', 'test')
+          os.environ.setdefault('GEMINI_API_KEY', 'test')
+          os.environ.setdefault('MONGODB_URI', 'mongodb://127.0.0.1:1')
+          os.environ.setdefault('ENABLE_ANALYTICS', 'false')
+          os.environ.setdefault('ENABLE_PROMETHEUS', 'false')
+          from src.api.app import create_app
+          app = create_app()
+          spec = app.openapi()
+          with open('openapi-pr.json', 'w') as f:
+              json.dump(spec, f, indent=2)
+          " || echo '{}' > openapi-pr.json
+
+      - name: Generate OpenAPI spec (base branch)
+        run: |
+          git stash || true
+          git checkout ${{ github.event.pull_request.base.ref }}
+          python -c "
+          import json, os
+          os.environ.setdefault('API_KEYS', '[\"test\"]')
+          os.environ.setdefault('JWT_SECRET_KEY', 'test')
+          os.environ.setdefault('PINECONE_API_KEY', 'test')
+          os.environ.setdefault('PINECONE_INDEX_NAME', 'test')
+          os.environ.setdefault('NEO4J_PASSWORD', 'test')
+          os.environ.setdefault('GEMINI_API_KEY', 'test')
+          os.environ.setdefault('MONGODB_URI', 'mongodb://127.0.0.1:1')
+          os.environ.setdefault('ENABLE_ANALYTICS', 'false')
+          os.environ.setdefault('ENABLE_PROMETHEUS', 'false')
+          from src.api.app import create_app
+          app = create_app()
+          spec = app.openapi()
+          with open('openapi-base.json', 'w') as f:
+              json.dump(spec, f, indent=2)
+          " || echo '{}' > openapi-base.json
+          git checkout -
+
+      - name: Diff OpenAPI specs
+        id: diff
+        run: |
+          pip install deepdiff
+          python -c "
+          import json, sys
+          from deepdiff import DeepDiff
+
+          with open('openapi-base.json') as f:
+              base = json.load(f)
+          with open('openapi-pr.json') as f:
+              pr = json.load(f)
+
+          diff = DeepDiff(base, pr, ignore_order=True)
+
+          if not diff:
+              print('NO_CHANGES')
+              sys.exit(0)
+
+          # Detect breaking changes
+          breaking = []
+          added = []
+          changed = []
+
+          removed = diff.get('dictionary_item_removed', [])
+          for item in removed:
+              path = str(item)
+              if '/paths/' in path:
+                  breaking.append(f'🔴 REMOVED: {path}')
+
+          new_items = diff.get('dictionary_item_added', [])
+          for item in new_items:
+              path = str(item)
+              if '/paths/' in path:
+                  added.append(f'🟢 ADDED: {path}')
+
+          values_changed = diff.get('values_changed', {})
+          for path, change in values_changed.items():
+              changed.append(f'🟡 CHANGED: {path}')
+
+          print('---REPORT---')
+          if breaking:
+              print('### ⚠️ Breaking Changes')
+              for b in breaking:
+                  print(f'- {b}')
+          if added:
+              print('### ✅ New Endpoints')
+              for a in added:
+                  print(f'- {a}')
+          if changed:
+              print('### 🔄 Modified')
+              for c in changed[:20]:
+                  print(f'- {c}')
+          " > api-diff-report.txt 2>&1 || true
+
+          cat api-diff-report.txt
+
+      - name: Post diff to PR
+        if: always()
+        uses: actions/github-script@v7
+        with:
+          script: |
+            const fs = require('fs');
+            let report = '';
+            try {
+              report = fs.readFileSync('api-diff-report.txt', 'utf8');
+            } catch { report = 'Could not generate API diff.'; }
+
+            if (report.includes('NO_CHANGES')) {
+              return;  // No API changes, skip comment
+            }
+
+            await github.rest.issues.createComment({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              issue_number: context.issue.number,
+              body: `## 🔍 API Schema Diff\n\n${report}\n\n---\n_Auto-generated by API Schema Diff workflow_`,
+            });
diff --git a/.github/workflows/danger.yml b/.github/workflows/danger.yml
new file mode 100644
index 0000000..e5b68a4
--- /dev/null
+++ b/.github/workflows/danger.yml
@@ -0,0 +1,39 @@
+name: Danger PR Review Bot
+
+# Danger runs on the PR and posts a review comment with potential issues.
+# It does NOT block the PR — it's purely advisory for the reviewer.
+
+on:
+  pull_request:
+    branches: [main, master, develop]
+
+permissions:
+  pull-requests: write
+  statuses: write
+
+concurrency:
+  group: danger-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  danger:
+    name: Danger Review
+    runs-on: ubuntu-latest
+    timeout-minutes: 10
+
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - uses: actions/setup-node@v4
+        with:
+          node-version: "20"
+
+      - name: Install Danger
+        run: npm install --save-dev danger
+
+      - name: Run Danger
+        run: npx danger ci
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.github/workflows/deploy-aws.yml b/.github/workflows/deploy-aws.yml
index da48ef8..7abb509 100644
--- a/.github/workflows/deploy-aws.yml
+++ b/.github/workflows/deploy-aws.yml
@@ -1,28 +1,18 @@
-# Deploy XMem to an existing AWS EC2 instance when changes land on main.
+# Deploy XMem to the PRODUCTION AWS EC2 instance when changes land on main.
 #
-# A merged PR produces a push to main — that is what triggers this workflow.
-# The domain / DNS record does not move: it still points at the same instance;
-# this job only updates the app on that machine (git pull + Docker restart, etc.).
+# ⚠️  IMPORTANT: This should only fire from the promote-to-production workflow
+# merging develop → main. Direct pushes to main should be blocked by branch
+# protection rules (Settings → Branches → main → Require PR).
 #
 # Required GitHub repository secrets:
-#   EC2_HOST          Public DNS, Elastic IP, or hostname (same host you use with
-#                     `ssh -i your-key.pem ...` today).
-#   EC2_USER          SSH user (e.g. ubuntu, ec2-user, admin).
-#   EC2_SSH_KEY       The SAME private key as your AWS `.pem` file: open the PEM in a
-#                     text editor and paste the entire contents into this secret —
-#                     including the `-----BEGIN ... PRIVATE KEY-----` and `-----END...`
-#                     lines. This is equivalent to `ssh -i key.pem` from your laptop.
-#                     If the key has a passphrase, also add optional secret
-#                     EC2_SSH_KEY_PASSPHRASE (see `passphrase` below).
+#   EC2_HOST          Public DNS, Elastic IP, or hostname
+#   EC2_USER          SSH user (e.g. ubuntu, ec2-user, admin)
+#   EC2_SSH_KEY       Private key (same format as your .pem)
 #
-# Required repository Variable (Settings → Secrets and variables → Actions → Variables):
-#   EC2_DEPLOY_PATH   Absolute path on the server where this repo is cloned
-#                     (e.g. /home/ubuntu/xmem).
-#
-# The instance must be able to `git pull` from GitHub (deploy key or cached credentials)
-# if the repo is private.
+# Required repository Variable:
+#   EC2_DEPLOY_PATH   Absolute path on the server (e.g. /home/ubuntu/xmem)
 
-name: Deploy to AWS EC2
+name: Deploy to Production
 
 on:
   push:
@@ -36,11 +26,21 @@ concurrency:
 jobs:
   deploy:
     runs-on: ubuntu-latest
-    environment: EC2_HOST
+    environment:
+      name: production
+      url: ${{ vars.PRODUCTION_URL }}
     permissions:
       contents: read
+      deployments: write
 
     steps:
+      - name: Create deployment
+        uses: chrnorm/deployment-action@v2
+        id: deployment
+        with:
+          token: ${{ secrets.GITHUB_TOKEN }}
+          environment: production
+
       - name: Deploy over SSH
         uses: appleboy/ssh-action@v1.2.2
         with:
@@ -52,7 +52,41 @@ jobs:
           script: |
             set -euo pipefail
             cd "${{ secrets.EC2_DEPLOY_PATH }}"
+
+            echo "── Pulling latest main ──"
             git fetch origin main
             git checkout main
             git pull origin main
-            sudo systemctl restart xmem
\ No newline at end of file
+
+            echo "── Restarting XMem service ──"
+            sudo systemctl restart xmem
+
+            echo "── Waiting for health endpoint ──"
+            for i in $(seq 1 30); do
+              HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:8000/health || true)
+              if [ "$HTTP_CODE" = "200" ]; then
+                echo "Health check passed (attempt $i)"
+                exit 0
+              fi
+              echo "Health check attempt $i: HTTP $HTTP_CODE — retrying in 10s"
+              sleep 10
+            done
+            echo "FATAL: Health check never returned 200 after 300s"
+            exit 1
+
+      - name: Deployment succeeded
+        if: success()
+        uses: chrnorm/deployment-status@v2
+        with:
+          token: ${{ secrets.GITHUB_TOKEN }}
+          deployment-id: ${{ steps.deployment.outputs.deployment_id }}
+          state: success
+          environment-url: ${{ vars.PRODUCTION_URL }}
+
+      - name: Deployment failed
+        if: failure()
+        uses: chrnorm/deployment-status@v2
+        with:
+          token: ${{ secrets.GITHUB_TOKEN }}
+          deployment-id: ${{ steps.deployment.outputs.deployment_id }}
+          state: failure
\ No newline at end of file
diff --git a/.github/workflows/deploy-staging.yml b/.github/workflows/deploy-staging.yml
new file mode 100644
index 0000000..f2019ad
--- /dev/null
+++ b/.github/workflows/deploy-staging.yml
@@ -0,0 +1,252 @@
+# ═══════════════════════════════════════════════════════════════════════════════
+# Deploy XMem to the STAGING (UAT / canary) EC2 instance.
+#
+# Triggered when:
+#   1. A PR is merged into `develop`  (automatic — the normal workflow)
+#   2. A manual run via workflow_dispatch (for ad-hoc deploys)
+#
+# After a successful deploy the "Smoke Test Staging" workflow runs
+# automatically to verify the app is alive and API contracts work.
+#
+# ┌──────────────┐    merge     ┌──────────┐    auto-deploy    ┌──────────┐
+# │ feature/fix  │ ──────────► │ develop  │ ────────────────► │ STAGING  │
+# └──────────────┘              └──────────┘                   └──────────┘
+#                                                                   │
+#                                                        smoke tests run
+#                                                                   │
+#                                                          ┌────────▼────────┐
+#                                                          │  You review on  │
+#                                                          │  staging URL    │
+#                                                          └────────┬────────┘
+#                                                                   │
+#                                             promote-to-production │ (manual)
+#                                                                   ▼
+#                                                           ┌──────────────┐
+#                                                           │  PRODUCTION  │
+#                                                           │  (main)      │
+#                                                           └──────────────┘
+#
+# ── Required secrets (Settings → Secrets → Actions) ──────────────────────────
+#   STAGING_EC2_HOST        Public DNS / Elastic IP of the staging instance
+#   STAGING_EC2_USER        SSH user (e.g. ubuntu)
+#   STAGING_EC2_SSH_KEY     Private key (same format as your .pem)
+#   STAGING_EC2_DEPLOY_PATH Absolute path on the staging server (/home/ubuntu/xmem)
+#
+#   Staging .env should have:
+#     - A separate Pinecone index (e.g. xmem-staging)
+#     - A separate MongoDB database (e.g. xmem_staging)
+#     - ENABLE_ANALYTICS=false
+#     - XMEM_ENV=staging
+# ═══════════════════════════════════════════════════════════════════════════════
+
+name: Deploy to Staging
+
+on:
+  push:
+    branches: [develop]
+  workflow_dispatch:
+    inputs:
+      ref:
+        description: "Git ref to deploy (branch, tag, or SHA)"
+        required: false
+        default: develop
+
+concurrency:
+  group: deploy-staging
+  cancel-in-progress: true    # latest push wins — old staging deploy is stale
+
+permissions:
+  contents: read
+  deployments: write
+  statuses: write
+
+jobs:
+  # ── 1. Build & validate Docker image ──────────────────────────────────────
+  validate-build:
+    name: Validate Docker build
+    runs-on: ubuntu-latest
+    timeout-minutes: 15
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          ref: ${{ github.event.inputs.ref || 'develop' }}
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Build Docker image (no push — validation only)
+        uses: docker/build-push-action@v5
+        with:
+          context: .
+          push: false
+          tags: xmem:staging-${{ github.sha }}
+          cache-from: type=gha
+          cache-to: type=gha,mode=max
+
+  # ── 2. Deploy to staging EC2 ──────────────────────────────────────────────
+  deploy:
+    name: Deploy to staging EC2
+    runs-on: ubuntu-latest
+    needs: validate-build
+    timeout-minutes: 20
+    environment:
+      name: staging
+      url: ${{ vars.STAGING_URL }}   # e.g. https://staging.xmem.bot
+
+    steps:
+      - name: Create GitHub deployment
+        uses: chrnorm/deployment-action@v2
+        id: deployment
+        with:
+          token: ${{ secrets.GITHUB_TOKEN }}
+          environment: staging
+
+      - name: Deploy over SSH
+        uses: appleboy/ssh-action@v1.2.2
+        with:
+          host: ${{ secrets.STAGING_EC2_HOST }}
+          username: ${{ secrets.STAGING_EC2_USER }}
+          key: ${{ secrets.STAGING_EC2_SSH_KEY }}
+          passphrase: ${{ secrets.STAGING_EC2_SSH_KEY_PASSPHRASE }}
+          command_timeout: 20m
+          script: |
+            set -euo pipefail
+            cd "${{ secrets.STAGING_EC2_DEPLOY_PATH }}"
+
+            echo "── Pulling latest develop ──"
+            git fetch origin develop
+            git checkout develop
+            git pull origin develop
+
+            echo "── Restarting XMem service ──"
+            sudo systemctl restart xmem-staging
+
+            echo "── Waiting for health endpoint ──"
+            for i in $(seq 1 30); do
+              HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:8001/health || true)
+              if [ "$HTTP_CODE" = "200" ]; then
+                echo "Health check passed (attempt $i)"
+                exit 0
+              fi
+              echo "Health check attempt $i: HTTP $HTTP_CODE — retrying in 10s"
+              sleep 10
+            done
+            echo "FATAL: Health check never returned 200 after 300s"
+            exit 1
+
+      - name: Update deployment status (success)
+        if: success()
+        uses: chrnorm/deployment-status@v2
+        with:
+          token: ${{ secrets.GITHUB_TOKEN }}
+          deployment-id: ${{ steps.deployment.outputs.deployment_id }}
+          state: success
+          environment-url: ${{ vars.STAGING_URL }}
+
+      - name: Update deployment status (failure)
+        if: failure()
+        uses: chrnorm/deployment-status@v2
+        with:
+          token: ${{ secrets.GITHUB_TOKEN }}
+          deployment-id: ${{ steps.deployment.outputs.deployment_id }}
+          state: failure
+
+  # ── 3. Smoke tests against live staging ───────────────────────────────────
+  smoke-test:
+    name: Smoke test staging
+    runs-on: ubuntu-latest
+    needs: deploy
+    timeout-minutes: 10
+    environment:
+      name: staging
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Wait for staging to stabilize
+        run: sleep 15
+
+      - name: Health check
+        run: |
+          STATUS=$(curl -sf "${{ vars.STAGING_URL }}/health" | jq -r '.data.status')
+          echo "Staging health status: $STATUS"
+          if [ "$STATUS" != "ready" ] && [ "$STATUS" != "loading" ]; then
+            echo "FAIL: staging is not healthy"
+            exit 1
+          fi
+
+      - name: API contract — /health response shape
+        run: |
+          RESPONSE=$(curl -sf "${{ vars.STAGING_URL }}/health")
+          echo "$RESPONSE" | jq -e '.status'
+          echo "$RESPONSE" | jq -e '.data.pipelines_ready'
+
+      - name: API contract — /docs returns OpenAPI
+        run: |
+          curl -sf "${{ vars.STAGING_URL }}/docs" | grep -q "swagger-ui"
+
+      - name: API contract — auth endpoints exist
+        run: |
+          HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" "${{ vars.STAGING_URL }}/auth/google")
+          echo "Auth endpoint returned: $HTTP_CODE"
+          # 307 (redirect) or 405 (method not allowed) are both valid — means route exists
+          if [ "$HTTP_CODE" = "404" ]; then
+            echo "FAIL: auth route missing"
+            exit 1
+          fi
+
+      - name: API contract — memory ingest rejects empty body
+        run: |
+          HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" \
+            -X POST "${{ vars.STAGING_URL }}/v1/memory/ingest" \
+            -H "Content-Type: application/json" \
+            -d '{}')
+          echo "Memory ingest with empty body: $HTTP_CODE"
+          # 422 = validation error (expected), 401 = auth required (also valid)
+          if [ "$HTTP_CODE" = "404" ] || [ "$HTTP_CODE" = "500" ]; then
+            echo "FAIL: unexpected response from memory ingest"
+            exit 1
+          fi
+
+      - name: Post result to PR
+        if: always()
+        uses: actions/github-script@v7
+        with:
+          script: |
+            const status = '${{ job.status }}' === 'success' ? '✅' : '❌';
+            const url = '${{ vars.STAGING_URL }}';
+            const sha = context.sha.substring(0, 7);
+
+            // Find any open PR targeting develop with this SHA
+            const { data: prs } = await github.rest.pulls.list({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              state: 'closed',
+              base: 'develop',
+              sort: 'updated',
+              per_page: 5,
+            });
+
+            const matchedPr = prs.find(pr => pr.merge_commit_sha?.startsWith(context.sha.substring(0, 7)) || pr.merged_at);
+
+            if (matchedPr) {
+              await github.rest.issues.createComment({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                issue_number: matchedPr.number,
+                body: [
+                  `## ${status} Staging Deployment Report`,
+                  '',
+                  `| Item | Value |`,
+                  `|------|-------|`,
+                  `| **Commit** | \`${sha}\` |`,
+                  `| **Environment** | [Staging](${url}) |`,
+                  `| **Health** | ${url}/health |`,
+                  `| **Smoke tests** | ${{ job.status }} |`,
+                  '',
+                  status === '✅'
+                    ? '🟢 Ready for review. Test at the staging URL above, then [promote to production](../actions/workflows/promote-to-production.yml).'
+                    : '🔴 Smoke tests failed. Check the [workflow run](../actions/runs/${{ github.run_id }}) for details.',
+                ].join('\n'),
+              });
+            }
diff --git a/.github/workflows/docker-build-check.yml b/.github/workflows/docker-build-check.yml
new file mode 100644
index 0000000..b3253ea
--- /dev/null
+++ b/.github/workflows/docker-build-check.yml
@@ -0,0 +1,58 @@
+# Validate that the Docker image builds successfully on every PR.
+# This catches broken Dockerfiles, missing files, and build-time errors
+# BEFORE code reaches staging.
+
+name: Docker Build Check
+
+on:
+  pull_request:
+    branches: [develop, main]
+    paths:
+      - "Dockerfile"
+      - "docker/**"
+      - "pyproject.toml"
+      - "setup.py"
+      - "src/**"
+      - "server.py"
+
+permissions:
+  contents: read
+
+concurrency:
+  group: docker-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  build:
+    name: Docker build validation
+    runs-on: ubuntu-latest
+    timeout-minutes: 15
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Build (no push)
+        uses: docker/build-push-action@v5
+        with:
+          context: .
+          push: false
+          tags: xmem:pr-${{ github.event.pull_request.number }}
+          cache-from: type=gha
+          cache-to: type=gha,mode=max
+
+      - name: Image size check
+        run: |
+          # Build without buildx to get local image for size check
+          docker build -t xmem:size-check .
+          SIZE=$(docker image inspect xmem:size-check --format='{{.Size}}')
+          SIZE_MB=$((SIZE / 1024 / 1024))
+          echo "Image size: ${SIZE_MB}MB"
+
+          MAX_SIZE_MB=2000
+          if [ "$SIZE_MB" -gt "$MAX_SIZE_MB" ]; then
+            echo "⚠️ WARNING: Docker image is ${SIZE_MB}MB (limit: ${MAX_SIZE_MB}MB)"
+            echo "Consider optimizing the Dockerfile (multi-stage builds, .dockerignore, etc.)"
+          fi
diff --git a/.github/workflows/promote-to-production.yml b/.github/workflows/promote-to-production.yml
new file mode 100644
index 0000000..81e9e05
--- /dev/null
+++ b/.github/workflows/promote-to-production.yml
@@ -0,0 +1,200 @@
+# ═══════════════════════════════════════════════════════════════════════════════
+# Promote staging → production
+#
+# This is the MANUAL gate between staging and production.
+# A maintainer clicks "Run workflow" only after reviewing on staging.
+#
+# What happens:
+#   1. Fast-forward merges `develop` into `main`
+#   2. This triggers `deploy-aws.yml` (existing workflow) → production EC2
+#   3. Runs the same smoke tests against production
+#   4. If production smoke tests fail → auto-rollback (reverts the merge)
+#
+# ── Required ─────────────────────────────────────────────────────────────────
+#   - The `production` environment in GitHub must have:
+#     Settings → Environments → production → Required reviewers: @ishaanxgupta, @ved015
+#   - This adds a manual approval popup even if someone accidentally triggers it.
+# ═══════════════════════════════════════════════════════════════════════════════
+
+name: Promote to Production
+
+on:
+  workflow_dispatch:
+    inputs:
+      skip_staging_check:
+        description: "Skip staging smoke test verification (emergency deploys only)"
+        type: boolean
+        default: false
+      confirm_production:
+        description: "Type 'DEPLOY' to confirm production deployment"
+        required: true
+
+concurrency:
+  group: deploy-production
+  cancel-in-progress: false     # never cancel a production deploy mid-flight
+
+permissions:
+  contents: write       # needed to push the merge to main
+  deployments: write
+  statuses: write
+
+jobs:
+  # ── 0. Safety checks ─────────────────────────────────────────────────────
+  gate:
+    name: Pre-flight checks
+    runs-on: ubuntu-latest
+    timeout-minutes: 5
+
+    steps:
+      - name: Confirm deploy keyword
+        run: |
+          if [ "${{ github.event.inputs.confirm_production }}" != "DEPLOY" ]; then
+            echo "❌ You must type 'DEPLOY' to confirm. Got: '${{ github.event.inputs.confirm_production }}'"
+            exit 1
+          fi
+
+      - name: Verify staging is healthy
+        if: ${{ github.event.inputs.skip_staging_check != 'true' }}
+        run: |
+          STATUS=$(curl -sf "${{ vars.STAGING_URL }}/health" | jq -r '.data.status' || echo "unreachable")
+          echo "Staging status: $STATUS"
+          if [ "$STATUS" != "ready" ]; then
+            echo "❌ Staging is not healthy ($STATUS). Fix staging first, or use skip_staging_check for emergencies."
+            exit 1
+          fi
+
+  # ── 1. Merge develop → main ──────────────────────────────────────────────
+  merge:
+    name: Merge develop → main
+    runs-on: ubuntu-latest
+    needs: gate
+    timeout-minutes: 10
+    environment:
+      name: production      # triggers manual approval if configured
+
+    outputs:
+      merge_sha: ${{ steps.merge.outputs.sha }}
+      previous_sha: ${{ steps.before.outputs.sha }}
+
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+          token: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Record pre-merge SHA
+        id: before
+        run: |
+          git checkout main
+          echo "sha=$(git rev-parse HEAD)" >> "$GITHUB_OUTPUT"
+
+      - name: Fast-forward merge develop → main
+        id: merge
+        run: |
+          git config user.name "github-actions[bot]"
+          git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
+
+          git checkout main
+          git merge origin/develop --ff-only
+
+          echo "sha=$(git rev-parse HEAD)" >> "$GITHUB_OUTPUT"
+
+      - name: Push to main
+        run: git push origin main
+
+  # ── 2. Wait for production deploy ────────────────────────────────────────
+  # The existing deploy-aws.yml fires on push to main.
+  # We just need to wait for the service to come up.
+  verify-production:
+    name: Verify production deployment
+    runs-on: ubuntu-latest
+    needs: merge
+    timeout-minutes: 15
+
+    steps:
+      - name: Wait for deploy-aws to finish
+        run: |
+          echo "Waiting 60s for deploy-aws.yml to pick up the push..."
+          sleep 60
+
+      - name: Production health check (with retry)
+        id: health
+        run: |
+          PROD_URL="${{ vars.PRODUCTION_URL }}"
+          if [ -z "$PROD_URL" ]; then
+            PROD_URL="${{ secrets.EC2_HOST }}"
+            PROD_URL="http://$PROD_URL:8000"
+          fi
+
+          for i in $(seq 1 30); do
+            HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" "$PROD_URL/health" || true)
+            if [ "$HTTP_CODE" = "200" ]; then
+              echo "✅ Production health check passed (attempt $i)"
+              echo "url=$PROD_URL" >> "$GITHUB_OUTPUT"
+              exit 0
+            fi
+            echo "Attempt $i: HTTP $HTTP_CODE — retrying in 10s"
+            sleep 10
+          done
+          echo "❌ Production health check failed after 5 minutes"
+          exit 1
+
+      - name: Production smoke — API docs
+        run: |
+          curl -sf "${{ steps.health.outputs.url }}/docs" | grep -q "swagger-ui"
+
+      - name: Production smoke — health response shape
+        run: |
+          RESPONSE=$(curl -sf "${{ steps.health.outputs.url }}/health")
+          echo "$RESPONSE" | jq -e '.data.pipelines_ready'
+
+  # ── 3. Auto-rollback on failure ──────────────────────────────────────────
+  rollback:
+    name: Rollback production
+    runs-on: ubuntu-latest
+    needs: [merge, verify-production]
+    if: failure() && needs.merge.result == 'success'
+    timeout-minutes: 10
+
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+          token: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Revert main to pre-merge state
+        run: |
+          git config user.name "github-actions[bot]"
+          git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
+
+          PREVIOUS_SHA="${{ needs.merge.outputs.previous_sha }}"
+          echo "⚠️ Rolling back main to $PREVIOUS_SHA"
+
+          git checkout main
+          git reset --hard "$PREVIOUS_SHA"
+          git push --force-with-lease origin main
+
+      - name: Notify rollback
+        uses: actions/github-script@v7
+        with:
+          script: |
+            await github.rest.issues.create({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              title: '🚨 PRODUCTION ROLLBACK — promote-to-production failed',
+              body: [
+                '## Automatic Rollback Executed',
+                '',
+                `Production deployment of \`${context.sha.substring(0, 7)}\` failed smoke tests.`,
+                '',
+                `**Main was rolled back to:** \`${{ needs.merge.outputs.previous_sha }}\``,
+                '',
+                `**Action required:**`,
+                '1. Check the [failed workflow run](../actions/runs/${{ github.run_id }})',
+                '2. Fix the issue on `develop`',
+                '3. Re-run promote-to-production',
+                '',
+                'cc @ishaanxgupta @ved015',
+              ].join('\n'),
+              labels: ['bug', 'status/urgent'],
+            });
diff --git a/.github/workflows/security-scan.yml b/.github/workflows/security-scan.yml
new file mode 100644
index 0000000..ccfd1f5
--- /dev/null
+++ b/.github/workflows/security-scan.yml
@@ -0,0 +1,71 @@
+name: Security Scan
+
+on:
+  pull_request:
+    branches: [main, master, develop]
+  schedule:
+    - cron: "0 2 * * 1"   # also run weekly on Mondays at 02:00 UTC
+
+permissions:
+  contents: read
+  security-events: write    # needed to upload SARIF to GitHub Security tab
+
+concurrency:
+  group: security-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  bandit:
+    name: Bandit SAST
+    runs-on: ubuntu-latest
+    timeout-minutes: 10
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+          cache: pip
+
+      - name: Install bandit
+        run: pip install bandit[toml]
+
+      - name: Run Bandit
+        run: |
+          bandit -r src/ \
+            -ll \
+            --exclude src/tests \
+            -f sarif \
+            -o bandit-results.sarif
+        continue-on-error: true
+
+      - name: Upload Bandit SARIF
+        uses: github/codeql-action/upload-sarif@v3
+        with:
+          sarif_file: bandit-results.sarif
+          category: bandit
+
+  pip-audit:
+    name: Dependency CVE Audit
+    runs-on: ubuntu-latest
+    timeout-minutes: 10
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+          cache: pip
+
+      - name: Install pip-audit
+        run: pip install pip-audit
+
+      - name: Audit dependencies
+        run: pip-audit --requirement <(pip install -e ".[dev]" --dry-run -q 2>&1 | grep "Would install" | sed 's/.*Would install //' | tr ' ' '\n' | sed 's/==.*/==&/') || true
+
+      - name: pip-audit (from pyproject)
+        run: |
+          pip install -e ".[dev]" -q
+          pip-audit
diff --git a/Dangerfile.js b/Dangerfile.js
new file mode 100644
index 0000000..4e5ba89
--- /dev/null
+++ b/Dangerfile.js
@@ -0,0 +1,87 @@
+// Dangerfile.js  — XMem PR Review Bot
+// Docs: https://danger.systems/js/
+
+// ── 1. Warn on big PRs ────────────────────────────────────────────────────────
+const bigPRThreshold = 300;
+const totalChanges = danger.github.pr.additions + danger.github.pr.deletions;
+
+if (totalChanges > bigPRThreshold) {
+  warn(
+    `📦 This PR changes **${totalChanges} lines** (additions + deletions). ` +
+    `Large PRs are harder to review thoroughly — consider splitting it.`
+  );
+}
+
+// ── 2. Require tests alongside source changes ─────────────────────────────────
+const hasSourceChanges = danger.git.modified_files
+  .some(f => f.startsWith("src/"));
+const hasTestChanges = danger.git.modified_files
+  .concat(danger.git.created_files)
+  .some(f => f.startsWith("tests/"));
+
+if (hasSourceChanges && !hasTestChanges) {
+  warn(
+    "🧪 Source files in `src/` were modified but no test files changed. " +
+    "Please add or update tests to cover your changes."
+  );
+}
+
+// ── 3. Changelog reminder ────────────────────────────────────────────────────
+const hasChangelog = danger.git.modified_files.includes("CHANGELOG.md");
+if (!hasChangelog) {
+  message("📝 No `CHANGELOG.md` update detected. If this PR introduces a user-visible change, please add an entry.");
+}
+
+// ── 4. Flag changes to sensitive files ───────────────────────────────────────
+const sensitiveFiles = [
+  "src/api/routes/auth.py",
+  "src/api/routes/admin.py",
+  "src/config/settings.py",
+  "src/config/security.py",
+  "Dockerfile",
+  "docker-compose.yml",
+  "docker-compose.prod.yml",
+];
+
+const touchedSensitive = danger.git.modified_files
+  .concat(danger.git.created_files)
+  .filter(f => sensitiveFiles.some(s => f.includes(s)));
+
+if (touchedSensitive.length > 0) {
+  fail(
+    `🔐 This PR modifies sensitive files: **${touchedSensitive.join(", ")}**. ` +
+    `These require review by a core maintainer (@ishaanxgupta or @ved015) before merging.`
+  );
+}
+
+// ── 5. Dependency changes reminder ───────────────────────────────────────────
+const depFiles = ["pyproject.toml", "uv.lock", "requirements.txt"];
+const touchedDeps = danger.git.modified_files.filter(f => depFiles.includes(f));
+
+if (touchedDeps.includes("pyproject.toml") || touchedDeps.includes("requirements.txt")) {
+  warn(
+    "📦 `pyproject.toml` or `requirements.txt` was modified. " +
+    "Make sure `uv.lock` is updated (`uv lock`) and the security audit passes."
+  );
+}
+
+// ── 6. No direct commits to main ─────────────────────────────────────────────
+const targetBranch = danger.github.pr.base.ref;
+if (targetBranch === "main" || targetBranch === "master") {
+  // We're already in a PR — just remind about squash
+  message(
+    `✅ Targeting \`${targetBranch}\`. Please **squash commits** before merging ` +
+    `to keep the git history clean.`
+  );
+}
+
+// ── 7. PR description completeness ───────────────────────────────────────────
+const prBody = danger.github.pr.body || "";
+if (prBody.trim().length < 80) {
+  fail(
+    "📋 PR description is too short. Please describe:\n" +
+    "- **What** changed and **Why**\n" +
+    "- Any relevant issue links (`Closes #NNN`)\n" +
+    "- Steps to test manually"
+  );
+}