diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 0000000..c309c2f --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,37 @@ +--- +name: Pull Request +about: Contribute a change to XMem +--- + +## Summary + + + +## Motivation / Problem + + +Closes # + +## Changes + +- + +## Testing + +- [ ] Unit tests added / updated (`pytest tests/unit`) +- [ ] Integration tests pass (`pytest tests/integration`) +- [ ] Tested manually โ€” steps below: + +``` +# command to reproduce +``` + +## Screenshots / recordings (if UI change) + + +## Checklist +- [ ] My PR title follows [Conventional Commits](https://www.conventionalcommits.org/) (`feat(scope): description`) +- [ ] I ran `ruff check .` and `black --check .` locally with no errors +- [ ] I updated `CHANGELOG.md` if this is a user-visible change +- [ ] I ran `uv lock` if I modified `pyproject.toml` +- [ ] Security-sensitive files modified? Pinged `@ishaanxgupta` or `@ved015` diff --git a/.github/workflows/api-schema-diff.yml b/.github/workflows/api-schema-diff.yml new file mode 100644 index 0000000..f4475b2 --- /dev/null +++ b/.github/workflows/api-schema-diff.yml @@ -0,0 +1,162 @@ +# API diff check โ€” detect breaking changes in OpenAPI schema on PRs. +# Compares the OpenAPI spec from the PR branch against `develop` and +# posts a diff comment so reviewers can see exactly what API surface changed. + +name: API Schema Diff + +on: + pull_request: + branches: [develop, main] + paths: + - "src/api/**" + - "src/schemas/**" + +permissions: + contents: read + pull-requests: write + +concurrency: + group: api-diff-${{ github.ref }} + cancel-in-progress: true + +jobs: + diff: + name: Detect API breaking changes + runs-on: ubuntu-latest + timeout-minutes: 10 + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - uses: actions/setup-python@v5 + with: + python-version: "3.11" + cache: pip + + - name: Install dependencies + run: | + pip install -e ".[dev]" + + - name: Generate OpenAPI spec (PR branch) + run: | + python -c " + import json, os + os.environ.setdefault('API_KEYS', '[\"test\"]') + os.environ.setdefault('JWT_SECRET_KEY', 'test') + os.environ.setdefault('PINECONE_API_KEY', 'test') + os.environ.setdefault('PINECONE_INDEX_NAME', 'test') + os.environ.setdefault('NEO4J_PASSWORD', 'test') + os.environ.setdefault('GEMINI_API_KEY', 'test') + os.environ.setdefault('MONGODB_URI', 'mongodb://127.0.0.1:1') + os.environ.setdefault('ENABLE_ANALYTICS', 'false') + os.environ.setdefault('ENABLE_PROMETHEUS', 'false') + from src.api.app import create_app + app = create_app() + spec = app.openapi() + with open('openapi-pr.json', 'w') as f: + json.dump(spec, f, indent=2) + " || echo '{}' > openapi-pr.json + + - name: Generate OpenAPI spec (base branch) + run: | + git stash || true + git checkout ${{ github.event.pull_request.base.ref }} + python -c " + import json, os + os.environ.setdefault('API_KEYS', '[\"test\"]') + os.environ.setdefault('JWT_SECRET_KEY', 'test') + os.environ.setdefault('PINECONE_API_KEY', 'test') + os.environ.setdefault('PINECONE_INDEX_NAME', 'test') + os.environ.setdefault('NEO4J_PASSWORD', 'test') + os.environ.setdefault('GEMINI_API_KEY', 'test') + os.environ.setdefault('MONGODB_URI', 'mongodb://127.0.0.1:1') + os.environ.setdefault('ENABLE_ANALYTICS', 'false') + os.environ.setdefault('ENABLE_PROMETHEUS', 'false') + from src.api.app import create_app + app = create_app() + spec = app.openapi() + with open('openapi-base.json', 'w') as f: + json.dump(spec, f, indent=2) + " || echo '{}' > openapi-base.json + git checkout - + + - name: Diff OpenAPI specs + id: diff + run: | + pip install deepdiff + python -c " + import json, sys + from deepdiff import DeepDiff + + with open('openapi-base.json') as f: + base = json.load(f) + with open('openapi-pr.json') as f: + pr = json.load(f) + + diff = DeepDiff(base, pr, ignore_order=True) + + if not diff: + print('NO_CHANGES') + sys.exit(0) + + # Detect breaking changes + breaking = [] + added = [] + changed = [] + + removed = diff.get('dictionary_item_removed', []) + for item in removed: + path = str(item) + if '/paths/' in path: + breaking.append(f'๐Ÿ”ด REMOVED: {path}') + + new_items = diff.get('dictionary_item_added', []) + for item in new_items: + path = str(item) + if '/paths/' in path: + added.append(f'๐ŸŸข ADDED: {path}') + + values_changed = diff.get('values_changed', {}) + for path, change in values_changed.items(): + changed.append(f'๐ŸŸก CHANGED: {path}') + + print('---REPORT---') + if breaking: + print('### โš ๏ธ Breaking Changes') + for b in breaking: + print(f'- {b}') + if added: + print('### โœ… New Endpoints') + for a in added: + print(f'- {a}') + if changed: + print('### ๐Ÿ”„ Modified') + for c in changed[:20]: + print(f'- {c}') + " > api-diff-report.txt 2>&1 || true + + cat api-diff-report.txt + + - name: Post diff to PR + if: always() + uses: actions/github-script@v7 + with: + script: | + const fs = require('fs'); + let report = ''; + try { + report = fs.readFileSync('api-diff-report.txt', 'utf8'); + } catch { report = 'Could not generate API diff.'; } + + if (report.includes('NO_CHANGES')) { + return; // No API changes, skip comment + } + + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.issue.number, + body: `## ๐Ÿ” API Schema Diff\n\n${report}\n\n---\n_Auto-generated by API Schema Diff workflow_`, + }); diff --git a/.github/workflows/danger.yml b/.github/workflows/danger.yml new file mode 100644 index 0000000..e5b68a4 --- /dev/null +++ b/.github/workflows/danger.yml @@ -0,0 +1,39 @@ +name: Danger PR Review Bot + +# Danger runs on the PR and posts a review comment with potential issues. +# It does NOT block the PR โ€” it's purely advisory for the reviewer. + +on: + pull_request: + branches: [main, master, develop] + +permissions: + pull-requests: write + statuses: write + +concurrency: + group: danger-${{ github.ref }} + cancel-in-progress: true + +jobs: + danger: + name: Danger Review + runs-on: ubuntu-latest + timeout-minutes: 10 + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - uses: actions/setup-node@v4 + with: + node-version: "20" + + - name: Install Danger + run: npm install --save-dev danger + + - name: Run Danger + run: npx danger ci + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/deploy-aws.yml b/.github/workflows/deploy-aws.yml index da48ef8..7abb509 100644 --- a/.github/workflows/deploy-aws.yml +++ b/.github/workflows/deploy-aws.yml @@ -1,28 +1,18 @@ -# Deploy XMem to an existing AWS EC2 instance when changes land on main. +# Deploy XMem to the PRODUCTION AWS EC2 instance when changes land on main. # -# A merged PR produces a push to main โ€” that is what triggers this workflow. -# The domain / DNS record does not move: it still points at the same instance; -# this job only updates the app on that machine (git pull + Docker restart, etc.). +# โš ๏ธ IMPORTANT: This should only fire from the promote-to-production workflow +# merging develop โ†’ main. Direct pushes to main should be blocked by branch +# protection rules (Settings โ†’ Branches โ†’ main โ†’ Require PR). # # Required GitHub repository secrets: -# EC2_HOST Public DNS, Elastic IP, or hostname (same host you use with -# `ssh -i your-key.pem ...` today). -# EC2_USER SSH user (e.g. ubuntu, ec2-user, admin). -# EC2_SSH_KEY The SAME private key as your AWS `.pem` file: open the PEM in a -# text editor and paste the entire contents into this secret โ€” -# including the `-----BEGIN ... PRIVATE KEY-----` and `-----END...` -# lines. This is equivalent to `ssh -i key.pem` from your laptop. -# If the key has a passphrase, also add optional secret -# EC2_SSH_KEY_PASSPHRASE (see `passphrase` below). +# EC2_HOST Public DNS, Elastic IP, or hostname +# EC2_USER SSH user (e.g. ubuntu, ec2-user, admin) +# EC2_SSH_KEY Private key (same format as your .pem) # -# Required repository Variable (Settings โ†’ Secrets and variables โ†’ Actions โ†’ Variables): -# EC2_DEPLOY_PATH Absolute path on the server where this repo is cloned -# (e.g. /home/ubuntu/xmem). -# -# The instance must be able to `git pull` from GitHub (deploy key or cached credentials) -# if the repo is private. +# Required repository Variable: +# EC2_DEPLOY_PATH Absolute path on the server (e.g. /home/ubuntu/xmem) -name: Deploy to AWS EC2 +name: Deploy to Production on: push: @@ -36,11 +26,21 @@ concurrency: jobs: deploy: runs-on: ubuntu-latest - environment: EC2_HOST + environment: + name: production + url: ${{ vars.PRODUCTION_URL }} permissions: contents: read + deployments: write steps: + - name: Create deployment + uses: chrnorm/deployment-action@v2 + id: deployment + with: + token: ${{ secrets.GITHUB_TOKEN }} + environment: production + - name: Deploy over SSH uses: appleboy/ssh-action@v1.2.2 with: @@ -52,7 +52,41 @@ jobs: script: | set -euo pipefail cd "${{ secrets.EC2_DEPLOY_PATH }}" + + echo "โ”€โ”€ Pulling latest main โ”€โ”€" git fetch origin main git checkout main git pull origin main - sudo systemctl restart xmem \ No newline at end of file + + echo "โ”€โ”€ Restarting XMem service โ”€โ”€" + sudo systemctl restart xmem + + echo "โ”€โ”€ Waiting for health endpoint โ”€โ”€" + for i in $(seq 1 30); do + HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:8000/health || true) + if [ "$HTTP_CODE" = "200" ]; then + echo "Health check passed (attempt $i)" + exit 0 + fi + echo "Health check attempt $i: HTTP $HTTP_CODE โ€” retrying in 10s" + sleep 10 + done + echo "FATAL: Health check never returned 200 after 300s" + exit 1 + + - name: Deployment succeeded + if: success() + uses: chrnorm/deployment-status@v2 + with: + token: ${{ secrets.GITHUB_TOKEN }} + deployment-id: ${{ steps.deployment.outputs.deployment_id }} + state: success + environment-url: ${{ vars.PRODUCTION_URL }} + + - name: Deployment failed + if: failure() + uses: chrnorm/deployment-status@v2 + with: + token: ${{ secrets.GITHUB_TOKEN }} + deployment-id: ${{ steps.deployment.outputs.deployment_id }} + state: failure \ No newline at end of file diff --git a/.github/workflows/deploy-staging.yml b/.github/workflows/deploy-staging.yml new file mode 100644 index 0000000..f2019ad --- /dev/null +++ b/.github/workflows/deploy-staging.yml @@ -0,0 +1,252 @@ +# โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• +# Deploy XMem to the STAGING (UAT / canary) EC2 instance. +# +# Triggered when: +# 1. A PR is merged into `develop` (automatic โ€” the normal workflow) +# 2. A manual run via workflow_dispatch (for ad-hoc deploys) +# +# After a successful deploy the "Smoke Test Staging" workflow runs +# automatically to verify the app is alive and API contracts work. +# +# โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” merge โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” auto-deploy โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” +# โ”‚ feature/fix โ”‚ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บ โ”‚ develop โ”‚ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บ โ”‚ STAGING โ”‚ +# โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ +# โ”‚ +# smoke tests run +# โ”‚ +# โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” +# โ”‚ You review on โ”‚ +# โ”‚ staging URL โ”‚ +# โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ +# โ”‚ +# promote-to-production โ”‚ (manual) +# โ–ผ +# โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” +# โ”‚ PRODUCTION โ”‚ +# โ”‚ (main) โ”‚ +# โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ +# +# โ”€โ”€ Required secrets (Settings โ†’ Secrets โ†’ Actions) โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ +# STAGING_EC2_HOST Public DNS / Elastic IP of the staging instance +# STAGING_EC2_USER SSH user (e.g. ubuntu) +# STAGING_EC2_SSH_KEY Private key (same format as your .pem) +# STAGING_EC2_DEPLOY_PATH Absolute path on the staging server (/home/ubuntu/xmem) +# +# Staging .env should have: +# - A separate Pinecone index (e.g. xmem-staging) +# - A separate MongoDB database (e.g. xmem_staging) +# - ENABLE_ANALYTICS=false +# - XMEM_ENV=staging +# โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• + +name: Deploy to Staging + +on: + push: + branches: [develop] + workflow_dispatch: + inputs: + ref: + description: "Git ref to deploy (branch, tag, or SHA)" + required: false + default: develop + +concurrency: + group: deploy-staging + cancel-in-progress: true # latest push wins โ€” old staging deploy is stale + +permissions: + contents: read + deployments: write + statuses: write + +jobs: + # โ”€โ”€ 1. Build & validate Docker image โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + validate-build: + name: Validate Docker build + runs-on: ubuntu-latest + timeout-minutes: 15 + steps: + - uses: actions/checkout@v4 + with: + ref: ${{ github.event.inputs.ref || 'develop' }} + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Build Docker image (no push โ€” validation only) + uses: docker/build-push-action@v5 + with: + context: . + push: false + tags: xmem:staging-${{ github.sha }} + cache-from: type=gha + cache-to: type=gha,mode=max + + # โ”€โ”€ 2. Deploy to staging EC2 โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + deploy: + name: Deploy to staging EC2 + runs-on: ubuntu-latest + needs: validate-build + timeout-minutes: 20 + environment: + name: staging + url: ${{ vars.STAGING_URL }} # e.g. https://staging.xmem.bot + + steps: + - name: Create GitHub deployment + uses: chrnorm/deployment-action@v2 + id: deployment + with: + token: ${{ secrets.GITHUB_TOKEN }} + environment: staging + + - name: Deploy over SSH + uses: appleboy/ssh-action@v1.2.2 + with: + host: ${{ secrets.STAGING_EC2_HOST }} + username: ${{ secrets.STAGING_EC2_USER }} + key: ${{ secrets.STAGING_EC2_SSH_KEY }} + passphrase: ${{ secrets.STAGING_EC2_SSH_KEY_PASSPHRASE }} + command_timeout: 20m + script: | + set -euo pipefail + cd "${{ secrets.STAGING_EC2_DEPLOY_PATH }}" + + echo "โ”€โ”€ Pulling latest develop โ”€โ”€" + git fetch origin develop + git checkout develop + git pull origin develop + + echo "โ”€โ”€ Restarting XMem service โ”€โ”€" + sudo systemctl restart xmem-staging + + echo "โ”€โ”€ Waiting for health endpoint โ”€โ”€" + for i in $(seq 1 30); do + HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:8001/health || true) + if [ "$HTTP_CODE" = "200" ]; then + echo "Health check passed (attempt $i)" + exit 0 + fi + echo "Health check attempt $i: HTTP $HTTP_CODE โ€” retrying in 10s" + sleep 10 + done + echo "FATAL: Health check never returned 200 after 300s" + exit 1 + + - name: Update deployment status (success) + if: success() + uses: chrnorm/deployment-status@v2 + with: + token: ${{ secrets.GITHUB_TOKEN }} + deployment-id: ${{ steps.deployment.outputs.deployment_id }} + state: success + environment-url: ${{ vars.STAGING_URL }} + + - name: Update deployment status (failure) + if: failure() + uses: chrnorm/deployment-status@v2 + with: + token: ${{ secrets.GITHUB_TOKEN }} + deployment-id: ${{ steps.deployment.outputs.deployment_id }} + state: failure + + # โ”€โ”€ 3. Smoke tests against live staging โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + smoke-test: + name: Smoke test staging + runs-on: ubuntu-latest + needs: deploy + timeout-minutes: 10 + environment: + name: staging + + steps: + - uses: actions/checkout@v4 + + - name: Wait for staging to stabilize + run: sleep 15 + + - name: Health check + run: | + STATUS=$(curl -sf "${{ vars.STAGING_URL }}/health" | jq -r '.data.status') + echo "Staging health status: $STATUS" + if [ "$STATUS" != "ready" ] && [ "$STATUS" != "loading" ]; then + echo "FAIL: staging is not healthy" + exit 1 + fi + + - name: API contract โ€” /health response shape + run: | + RESPONSE=$(curl -sf "${{ vars.STAGING_URL }}/health") + echo "$RESPONSE" | jq -e '.status' + echo "$RESPONSE" | jq -e '.data.pipelines_ready' + + - name: API contract โ€” /docs returns OpenAPI + run: | + curl -sf "${{ vars.STAGING_URL }}/docs" | grep -q "swagger-ui" + + - name: API contract โ€” auth endpoints exist + run: | + HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" "${{ vars.STAGING_URL }}/auth/google") + echo "Auth endpoint returned: $HTTP_CODE" + # 307 (redirect) or 405 (method not allowed) are both valid โ€” means route exists + if [ "$HTTP_CODE" = "404" ]; then + echo "FAIL: auth route missing" + exit 1 + fi + + - name: API contract โ€” memory ingest rejects empty body + run: | + HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" \ + -X POST "${{ vars.STAGING_URL }}/v1/memory/ingest" \ + -H "Content-Type: application/json" \ + -d '{}') + echo "Memory ingest with empty body: $HTTP_CODE" + # 422 = validation error (expected), 401 = auth required (also valid) + if [ "$HTTP_CODE" = "404" ] || [ "$HTTP_CODE" = "500" ]; then + echo "FAIL: unexpected response from memory ingest" + exit 1 + fi + + - name: Post result to PR + if: always() + uses: actions/github-script@v7 + with: + script: | + const status = '${{ job.status }}' === 'success' ? 'โœ…' : 'โŒ'; + const url = '${{ vars.STAGING_URL }}'; + const sha = context.sha.substring(0, 7); + + // Find any open PR targeting develop with this SHA + const { data: prs } = await github.rest.pulls.list({ + owner: context.repo.owner, + repo: context.repo.repo, + state: 'closed', + base: 'develop', + sort: 'updated', + per_page: 5, + }); + + const matchedPr = prs.find(pr => pr.merge_commit_sha?.startsWith(context.sha.substring(0, 7)) || pr.merged_at); + + if (matchedPr) { + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: matchedPr.number, + body: [ + `## ${status} Staging Deployment Report`, + '', + `| Item | Value |`, + `|------|-------|`, + `| **Commit** | \`${sha}\` |`, + `| **Environment** | [Staging](${url}) |`, + `| **Health** | ${url}/health |`, + `| **Smoke tests** | ${{ job.status }} |`, + '', + status === 'โœ…' + ? '๐ŸŸข Ready for review. Test at the staging URL above, then [promote to production](../actions/workflows/promote-to-production.yml).' + : '๐Ÿ”ด Smoke tests failed. Check the [workflow run](../actions/runs/${{ github.run_id }}) for details.', + ].join('\n'), + }); + } diff --git a/.github/workflows/docker-build-check.yml b/.github/workflows/docker-build-check.yml new file mode 100644 index 0000000..b3253ea --- /dev/null +++ b/.github/workflows/docker-build-check.yml @@ -0,0 +1,58 @@ +# Validate that the Docker image builds successfully on every PR. +# This catches broken Dockerfiles, missing files, and build-time errors +# BEFORE code reaches staging. + +name: Docker Build Check + +on: + pull_request: + branches: [develop, main] + paths: + - "Dockerfile" + - "docker/**" + - "pyproject.toml" + - "setup.py" + - "src/**" + - "server.py" + +permissions: + contents: read + +concurrency: + group: docker-${{ github.ref }} + cancel-in-progress: true + +jobs: + build: + name: Docker build validation + runs-on: ubuntu-latest + timeout-minutes: 15 + + steps: + - uses: actions/checkout@v4 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Build (no push) + uses: docker/build-push-action@v5 + with: + context: . + push: false + tags: xmem:pr-${{ github.event.pull_request.number }} + cache-from: type=gha + cache-to: type=gha,mode=max + + - name: Image size check + run: | + # Build without buildx to get local image for size check + docker build -t xmem:size-check . + SIZE=$(docker image inspect xmem:size-check --format='{{.Size}}') + SIZE_MB=$((SIZE / 1024 / 1024)) + echo "Image size: ${SIZE_MB}MB" + + MAX_SIZE_MB=2000 + if [ "$SIZE_MB" -gt "$MAX_SIZE_MB" ]; then + echo "โš ๏ธ WARNING: Docker image is ${SIZE_MB}MB (limit: ${MAX_SIZE_MB}MB)" + echo "Consider optimizing the Dockerfile (multi-stage builds, .dockerignore, etc.)" + fi diff --git a/.github/workflows/promote-to-production.yml b/.github/workflows/promote-to-production.yml new file mode 100644 index 0000000..81e9e05 --- /dev/null +++ b/.github/workflows/promote-to-production.yml @@ -0,0 +1,200 @@ +# โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• +# Promote staging โ†’ production +# +# This is the MANUAL gate between staging and production. +# A maintainer clicks "Run workflow" only after reviewing on staging. +# +# What happens: +# 1. Fast-forward merges `develop` into `main` +# 2. This triggers `deploy-aws.yml` (existing workflow) โ†’ production EC2 +# 3. Runs the same smoke tests against production +# 4. If production smoke tests fail โ†’ auto-rollback (reverts the merge) +# +# โ”€โ”€ Required โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ +# - The `production` environment in GitHub must have: +# Settings โ†’ Environments โ†’ production โ†’ Required reviewers: @ishaanxgupta, @ved015 +# - This adds a manual approval popup even if someone accidentally triggers it. +# โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• + +name: Promote to Production + +on: + workflow_dispatch: + inputs: + skip_staging_check: + description: "Skip staging smoke test verification (emergency deploys only)" + type: boolean + default: false + confirm_production: + description: "Type 'DEPLOY' to confirm production deployment" + required: true + +concurrency: + group: deploy-production + cancel-in-progress: false # never cancel a production deploy mid-flight + +permissions: + contents: write # needed to push the merge to main + deployments: write + statuses: write + +jobs: + # โ”€โ”€ 0. Safety checks โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + gate: + name: Pre-flight checks + runs-on: ubuntu-latest + timeout-minutes: 5 + + steps: + - name: Confirm deploy keyword + run: | + if [ "${{ github.event.inputs.confirm_production }}" != "DEPLOY" ]; then + echo "โŒ You must type 'DEPLOY' to confirm. Got: '${{ github.event.inputs.confirm_production }}'" + exit 1 + fi + + - name: Verify staging is healthy + if: ${{ github.event.inputs.skip_staging_check != 'true' }} + run: | + STATUS=$(curl -sf "${{ vars.STAGING_URL }}/health" | jq -r '.data.status' || echo "unreachable") + echo "Staging status: $STATUS" + if [ "$STATUS" != "ready" ]; then + echo "โŒ Staging is not healthy ($STATUS). Fix staging first, or use skip_staging_check for emergencies." + exit 1 + fi + + # โ”€โ”€ 1. Merge develop โ†’ main โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + merge: + name: Merge develop โ†’ main + runs-on: ubuntu-latest + needs: gate + timeout-minutes: 10 + environment: + name: production # triggers manual approval if configured + + outputs: + merge_sha: ${{ steps.merge.outputs.sha }} + previous_sha: ${{ steps.before.outputs.sha }} + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + token: ${{ secrets.GITHUB_TOKEN }} + + - name: Record pre-merge SHA + id: before + run: | + git checkout main + echo "sha=$(git rev-parse HEAD)" >> "$GITHUB_OUTPUT" + + - name: Fast-forward merge develop โ†’ main + id: merge + run: | + git config user.name "github-actions[bot]" + git config user.email "41898282+github-actions[bot]@users.noreply.github.com" + + git checkout main + git merge origin/develop --ff-only + + echo "sha=$(git rev-parse HEAD)" >> "$GITHUB_OUTPUT" + + - name: Push to main + run: git push origin main + + # โ”€โ”€ 2. Wait for production deploy โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + # The existing deploy-aws.yml fires on push to main. + # We just need to wait for the service to come up. + verify-production: + name: Verify production deployment + runs-on: ubuntu-latest + needs: merge + timeout-minutes: 15 + + steps: + - name: Wait for deploy-aws to finish + run: | + echo "Waiting 60s for deploy-aws.yml to pick up the push..." + sleep 60 + + - name: Production health check (with retry) + id: health + run: | + PROD_URL="${{ vars.PRODUCTION_URL }}" + if [ -z "$PROD_URL" ]; then + PROD_URL="${{ secrets.EC2_HOST }}" + PROD_URL="http://$PROD_URL:8000" + fi + + for i in $(seq 1 30); do + HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" "$PROD_URL/health" || true) + if [ "$HTTP_CODE" = "200" ]; then + echo "โœ… Production health check passed (attempt $i)" + echo "url=$PROD_URL" >> "$GITHUB_OUTPUT" + exit 0 + fi + echo "Attempt $i: HTTP $HTTP_CODE โ€” retrying in 10s" + sleep 10 + done + echo "โŒ Production health check failed after 5 minutes" + exit 1 + + - name: Production smoke โ€” API docs + run: | + curl -sf "${{ steps.health.outputs.url }}/docs" | grep -q "swagger-ui" + + - name: Production smoke โ€” health response shape + run: | + RESPONSE=$(curl -sf "${{ steps.health.outputs.url }}/health") + echo "$RESPONSE" | jq -e '.data.pipelines_ready' + + # โ”€โ”€ 3. Auto-rollback on failure โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + rollback: + name: Rollback production + runs-on: ubuntu-latest + needs: [merge, verify-production] + if: failure() && needs.merge.result == 'success' + timeout-minutes: 10 + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + token: ${{ secrets.GITHUB_TOKEN }} + + - name: Revert main to pre-merge state + run: | + git config user.name "github-actions[bot]" + git config user.email "41898282+github-actions[bot]@users.noreply.github.com" + + PREVIOUS_SHA="${{ needs.merge.outputs.previous_sha }}" + echo "โš ๏ธ Rolling back main to $PREVIOUS_SHA" + + git checkout main + git reset --hard "$PREVIOUS_SHA" + git push --force-with-lease origin main + + - name: Notify rollback + uses: actions/github-script@v7 + with: + script: | + await github.rest.issues.create({ + owner: context.repo.owner, + repo: context.repo.repo, + title: '๐Ÿšจ PRODUCTION ROLLBACK โ€” promote-to-production failed', + body: [ + '## Automatic Rollback Executed', + '', + `Production deployment of \`${context.sha.substring(0, 7)}\` failed smoke tests.`, + '', + `**Main was rolled back to:** \`${{ needs.merge.outputs.previous_sha }}\``, + '', + `**Action required:**`, + '1. Check the [failed workflow run](../actions/runs/${{ github.run_id }})', + '2. Fix the issue on `develop`', + '3. Re-run promote-to-production', + '', + 'cc @ishaanxgupta @ved015', + ].join('\n'), + labels: ['bug', 'status/urgent'], + }); diff --git a/.github/workflows/security-scan.yml b/.github/workflows/security-scan.yml new file mode 100644 index 0000000..ccfd1f5 --- /dev/null +++ b/.github/workflows/security-scan.yml @@ -0,0 +1,71 @@ +name: Security Scan + +on: + pull_request: + branches: [main, master, develop] + schedule: + - cron: "0 2 * * 1" # also run weekly on Mondays at 02:00 UTC + +permissions: + contents: read + security-events: write # needed to upload SARIF to GitHub Security tab + +concurrency: + group: security-${{ github.ref }} + cancel-in-progress: true + +jobs: + bandit: + name: Bandit SAST + runs-on: ubuntu-latest + timeout-minutes: 10 + + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v5 + with: + python-version: "3.11" + cache: pip + + - name: Install bandit + run: pip install bandit[toml] + + - name: Run Bandit + run: | + bandit -r src/ \ + -ll \ + --exclude src/tests \ + -f sarif \ + -o bandit-results.sarif + continue-on-error: true + + - name: Upload Bandit SARIF + uses: github/codeql-action/upload-sarif@v3 + with: + sarif_file: bandit-results.sarif + category: bandit + + pip-audit: + name: Dependency CVE Audit + runs-on: ubuntu-latest + timeout-minutes: 10 + + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v5 + with: + python-version: "3.11" + cache: pip + + - name: Install pip-audit + run: pip install pip-audit + + - name: Audit dependencies + run: pip-audit --requirement <(pip install -e ".[dev]" --dry-run -q 2>&1 | grep "Would install" | sed 's/.*Would install //' | tr ' ' '\n' | sed 's/==.*/==&/') || true + + - name: pip-audit (from pyproject) + run: | + pip install -e ".[dev]" -q + pip-audit diff --git a/Dangerfile.js b/Dangerfile.js new file mode 100644 index 0000000..4e5ba89 --- /dev/null +++ b/Dangerfile.js @@ -0,0 +1,87 @@ +// Dangerfile.js โ€” XMem PR Review Bot +// Docs: https://danger.systems/js/ + +// โ”€โ”€ 1. Warn on big PRs โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ +const bigPRThreshold = 300; +const totalChanges = danger.github.pr.additions + danger.github.pr.deletions; + +if (totalChanges > bigPRThreshold) { + warn( + `๐Ÿ“ฆ This PR changes **${totalChanges} lines** (additions + deletions). ` + + `Large PRs are harder to review thoroughly โ€” consider splitting it.` + ); +} + +// โ”€โ”€ 2. Require tests alongside source changes โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ +const hasSourceChanges = danger.git.modified_files + .some(f => f.startsWith("src/")); +const hasTestChanges = danger.git.modified_files + .concat(danger.git.created_files) + .some(f => f.startsWith("tests/")); + +if (hasSourceChanges && !hasTestChanges) { + warn( + "๐Ÿงช Source files in `src/` were modified but no test files changed. " + + "Please add or update tests to cover your changes." + ); +} + +// โ”€โ”€ 3. Changelog reminder โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ +const hasChangelog = danger.git.modified_files.includes("CHANGELOG.md"); +if (!hasChangelog) { + message("๐Ÿ“ No `CHANGELOG.md` update detected. If this PR introduces a user-visible change, please add an entry."); +} + +// โ”€โ”€ 4. Flag changes to sensitive files โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ +const sensitiveFiles = [ + "src/api/routes/auth.py", + "src/api/routes/admin.py", + "src/config/settings.py", + "src/config/security.py", + "Dockerfile", + "docker-compose.yml", + "docker-compose.prod.yml", +]; + +const touchedSensitive = danger.git.modified_files + .concat(danger.git.created_files) + .filter(f => sensitiveFiles.some(s => f.includes(s))); + +if (touchedSensitive.length > 0) { + fail( + `๐Ÿ” This PR modifies sensitive files: **${touchedSensitive.join(", ")}**. ` + + `These require review by a core maintainer (@ishaanxgupta or @ved015) before merging.` + ); +} + +// โ”€โ”€ 5. Dependency changes reminder โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ +const depFiles = ["pyproject.toml", "uv.lock", "requirements.txt"]; +const touchedDeps = danger.git.modified_files.filter(f => depFiles.includes(f)); + +if (touchedDeps.includes("pyproject.toml") || touchedDeps.includes("requirements.txt")) { + warn( + "๐Ÿ“ฆ `pyproject.toml` or `requirements.txt` was modified. " + + "Make sure `uv.lock` is updated (`uv lock`) and the security audit passes." + ); +} + +// โ”€โ”€ 6. No direct commits to main โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ +const targetBranch = danger.github.pr.base.ref; +if (targetBranch === "main" || targetBranch === "master") { + // We're already in a PR โ€” just remind about squash + message( + `โœ… Targeting \`${targetBranch}\`. Please **squash commits** before merging ` + + `to keep the git history clean.` + ); +} + +// โ”€โ”€ 7. PR description completeness โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ +const prBody = danger.github.pr.body || ""; +if (prBody.trim().length < 80) { + fail( + "๐Ÿ“‹ PR description is too short. Please describe:\n" + + "- **What** changed and **Why**\n" + + "- Any relevant issue links (`Closes #NNN`)\n" + + "- Steps to test manually" + ); +}