diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml new file mode 100644 index 0000000..6f8405e --- /dev/null +++ b/.github/workflows/docker.yml @@ -0,0 +1,49 @@ +name: Create and publish a Docker image + +on: + push: + branches: + - main + # Allow building/pushing on demand (e.g. before the first deploy). + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + build-and-push-image: + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + steps: + - name: Checkout repository + uses: actions/checkout@v4 + - name: Log in to GitHub Container Registry + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + - name: Extract metadata (tags, labels) for Docker + id: meta + uses: docker/metadata-action@v5 + with: + images: ghcr.io/${{ github.repository }} + tags: | + type=raw,value=latest,enable={{is_default_branch}} + type=sha,prefix=sha- + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + - name: Build and push Docker image + uses: docker/build-push-action@v6 + with: + context: . + file: Dockerfile + push: true + platforms: 'linux/amd64' + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + cache-to: type=gha,mode=max + cache-from: type=gha diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 7fa40cf..ec47fe5 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -32,3 +32,19 @@ jobs: - name: Run tests run: make test + + helm: + runs-on: ubuntu-latest + + steps: + - name: Check out repository + uses: actions/checkout@v6.0.3 + + - name: Set up Helm + uses: azure/setup-helm@v4 + + - name: Lint Helm chart + run: helm lint deploy/helm + + - name: Render production values + run: helm template serge deploy/helm -n serge -f deploy/helm/env/prod.yaml diff --git a/.gitignore b/.gitignore index f92a496..6c2706d 100644 --- a/.gitignore +++ b/.gitignore @@ -16,3 +16,5 @@ dist/ *.db .serena/ .jekyll-cache/ +# Filled-in copy of deploy/helm/serge-secrets.example.yaml — never commit real secrets. +deploy/helm/serge-secrets.yaml diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..86bfaad --- /dev/null +++ b/Dockerfile @@ -0,0 +1,28 @@ +# Production image for the serge web app (reviewbot-web). Mirrors the EC2 +# host: python3.11 + bubblewrap (so HELPER_SANDBOX can stay on), the +# package installed into a venv with the [web] extra (FastAPI/uvicorn), +# running uvicorn on $PORT (default 8080) as an unprivileged user. The +# embedded SQLite job store persists on a mounted volume (see chart/). +# +# The sandbox-verification image used for local bwrap testing lives at +# docker/Dockerfile and is unrelated to this one. +FROM python:3.11-slim + +RUN apt-get update \ + && apt-get install -y --no-install-recommends bubblewrap ca-certificates git \ + && rm -rf /var/lib/apt/lists/* + +# Unprivileged service user, mirroring ec2-user on the real host. +RUN useradd --create-home --shell /bin/bash app + +WORKDIR /opt/app +COPY . /opt/app +RUN python -m venv /opt/app/.venv \ + && /opt/app/.venv/bin/pip install --upgrade pip \ + && /opt/app/.venv/bin/pip install -e '.[web]' + +ENV PATH="/opt/app/.venv/bin:${PATH}" +ENV PORT=8080 +EXPOSE 8080 +USER app +CMD ["reviewbot-web"] diff --git a/deploy/README.md b/deploy/README.md new file mode 100644 index 0000000..329eacc --- /dev/null +++ b/deploy/README.md @@ -0,0 +1,83 @@ +# Deploying Serge + +This directory packages Serge's web app (`reviewbot-web`) for Kubernetes. +The Helm chart is intentionally self-contained and values-driven so a team can +deploy it into its own cluster without changing application code. + +## Contents + +- `helm/` contains the Helm chart for the web app: Deployment, Service, + ConfigMap, optional Ingress, optional ServiceAccount, and a PersistentVolumeClaim. +- `helm/env/prod.yaml` contains the production values used for + `serge.huggingface.tech` on the open-source EKS cluster. +- `helm/serge-secrets.example.yaml` is a template for the sensitive runtime env. + Copy it to `helm/serge-secrets.yaml`, fill it locally, and never commit it. +- `scripts/deploy.sh` checks the current Kubernetes context, creates the namespace + when needed, optionally applies a local Secret file, and runs Helm. +- `scripts/logs.sh` finds the current running Serge pod and prints recent logs. + +## Chart Behavior + +Serge uses embedded SQLite for review/task history. The chart therefore runs a +single replica with a `Recreate` rollout strategy and mounts a PVC at +`persistence.mountPath` (`/var/lib/reviewbot` by default). `WEB_STORE_PATH` is +set to `/jobs.db`, so the database survives pod restarts. + +The container runs as a non-root user, drops Linux capabilities, uses +`RuntimeDefault` seccomp, and sets `fsGroup` so the app user can write the +volume. Sensitive values are loaded from a pre-created Secret via +`existingSecret`; non-secret runtime config lives in `envVars`. + +## Deploy + +Create or update the Secret in the target namespace: + +```bash +cp deploy/helm/serge-secrets.example.yaml deploy/helm/serge-secrets.yaml +$EDITOR deploy/helm/serge-secrets.yaml +deploy/scripts/deploy.sh -n serge --secret-file deploy/helm/serge-secrets.yaml +``` + +Deploy without applying a Secret file, assuming `serge-secrets` already exists: + +```bash +deploy/scripts/deploy.sh -n serge -f deploy/helm/env/prod.yaml +``` + +Use `--context` when you want the script to refuse any other kube context: + +```bash +deploy/scripts/deploy.sh \ + --context infra:opensource-aws-use1-prod-54 \ + -n serge \ + -f deploy/helm/env/prod.yaml +``` + +Fetch recent logs: + +```bash +deploy/scripts/logs.sh \ + --context infra:opensource-aws-use1-prod-54 \ + -n serge \ + --since 2h \ + --grep 'error|traceback|crashed|HTTPError' +``` + +Print only the latest error block: + +```bash +deploy/scripts/logs.sh \ + --context infra:opensource-aws-use1-prod-54 \ + -n serge \ + --since 2h \ + --last-error +``` + +## Notes + +- The production image is published to GHCR as `ghcr.io/huggingface/serge`. +- `HELPER_SANDBOX=require` needs nodes that allow unprivileged user namespaces. + Set it to `auto` or `off` in `envVars` if the cluster cannot support that. +- Avoid `kubectl apply` for filled Secret manifests long-term: it can store + plaintext Secret values in the `last-applied-configuration` annotation. The + helper strips that annotation after applying a Secret file. diff --git a/deploy/helm/Chart.yaml b/deploy/helm/Chart.yaml new file mode 100644 index 0000000..8862e56 --- /dev/null +++ b/deploy/helm/Chart.yaml @@ -0,0 +1,6 @@ +apiVersion: v2 +name: serge +version: 0.1.0 +type: application +description: serge — GitHub PR reviewer web app (reviewbot-web) +icon: https://huggingface.co/front/assets/huggingface_logo-noborder.svg diff --git a/deploy/helm/env/prod.yaml b/deploy/helm/env/prod.yaml new file mode 100644 index 0000000..6bc5a66 --- /dev/null +++ b/deploy/helm/env/prod.yaml @@ -0,0 +1,77 @@ +# Values for the serge deployment on the open-source EKS cluster +# (opensource-aws-use1-prod-54), namespace chosen at install time. +# Deploy: deploy/scripts/deploy.sh -n serge -f deploy/helm/env/prod.yaml +# +# Image is public on GHCR (ghcr.io/huggingface/serge) so no imagePullSecret +# is needed. The ACM cert for serge.huggingface.tech already exists in this +# account (infra 05-eks-utils) and is auto-discovered by the ALB controller +# via the ingress host; external-dns creates the DNS record from the ingress. + +image: + # Bump to a newer sha- (pushed by CI on merge to main) or use latest. + tag: sha-33be7c5 + +replicas: 1 + +# Sensitive env: create this Secret in the target namespace beforehand +# (GITHUB_APP_ID, GITHUB_PRIVATE_KEY, GITHUB_WEBHOOK_SECRET, +# GITHUB_OAUTH_CLIENT_ID, GITHUB_OAUTH_CLIENT_SECRET, WEB_SESSION_SECRET, +# LLM_API_KEY, ...). +existingSecret: serge-secrets + +persistence: + enabled: true + size: 5Gi + # "" = cluster default StorageClass (EBS gp3 on this cluster). + storageClass: "" + +ingress: + enabled: true + host: serge.huggingface.tech + path: / + className: "alb" + annotations: + # Internal ALB: reachable from the internal network / VPN only, NOT the + # public internet. (Note: inbound GitHub App webhooks cannot reach an + # internal ALB — fine for the OAuth web flow, which redirects in the + # user's browser.) + alb.ingress.kubernetes.io/scheme: "internal" + alb.ingress.kubernetes.io/target-type: "ip" + alb.ingress.kubernetes.io/listen-ports: '[{"HTTP": 80}, {"HTTPS": 443}]' + alb.ingress.kubernetes.io/ssl-redirect: "443" + alb.ingress.kubernetes.io/healthcheck-path: "/healthz" + alb.ingress.kubernetes.io/tags: "Env=prod,Project=serge" + +# Non-secret runtime config. +envVars: + PORT: "8080" + LOG_LEVEL: "INFO" + LLM_API_BASE: "https://router.huggingface.co/v1" + LLM_MODEL: "moonshotai/Kimi-K2.6" + LLM_STREAM: "1" + LLM_BILL_TO: "huggingface" + # Kimi can spend a substantial part of the completion budget on reasoning. + # Keep enough room for long tool-heavy reviews and the final JSON review. + LLM_MAX_TOKENS: "49152" + TOOL_MAX_ITERATIONS: "30" + TASK_LLM_MAX_TOKENS: "49152" + TASK_TOOL_MAX_ITERATIONS: "30" + MENTION_TRIGGER: "@askserge" + REVIEW_EVENT: "COMMENT" + TASK_API_ENABLED: "1" + # Refuse to run PR-author subprocesses unless bubblewrap is available. + # Requires the node to allow unprivileged user namespaces; if pods fail + # to run reviews, switch to "auto". + HELPER_SANDBOX: "require" + DEV_NO_AUTH: "0" + WEB_ALLOWED_ORG: "huggingface" + GITHUB_OAUTH_CALLBACK_URL: "https://serge.huggingface.tech/auth/callback" + WEB_GITHUB_APP_URL: "https://github.com/apps/sergereview" + +resources: + requests: + cpu: 500m + memory: 512Mi + limits: + cpu: "1" + memory: 1Gi diff --git a/deploy/helm/serge-secrets.example.yaml b/deploy/helm/serge-secrets.example.yaml new file mode 100644 index 0000000..77b3d63 --- /dev/null +++ b/deploy/helm/serge-secrets.example.yaml @@ -0,0 +1,35 @@ +# serge-secrets — sensitive env injected into the pod via envFrom (the chart +# references this by name through `existingSecret: serge-secrets`). The keys +# MUST be the exact env var names serge reads. +# +# Usage: copy to serge-secrets.yaml (git-ignored), fill in the values, then: +# deploy/scripts/deploy.sh -n --secret-file deploy/helm/serge-secrets.yaml +# Do NOT commit the filled-in file. +apiVersion: v1 +kind: Secret +metadata: + name: serge-secrets + # namespace: # or pass -n to kubectl apply +type: Opaque +stringData: + # --- GitHub App (required: serge publishes reviews via the App) --- + GITHUB_APP_ID: "123456" + # Inline PEM. Paste the full key, indented under the block scalar. + GITHUB_PRIVATE_KEY: | + -----BEGIN RSA PRIVATE KEY----- + REPLACE_WITH_THE_GITHUB_APP_PRIVATE_KEY + -----END RSA PRIVATE KEY----- + + # --- Web auth (required when DEV_NO_AUTH=0, which prod uses) --- + GITHUB_OAUTH_CLIENT_ID: "Iv1.xxxxxxxxxxxx" + GITHUB_OAUTH_CLIENT_SECRET: "replace-me" + # Strong random secret: openssl rand -hex 32 + WEB_SESSION_SECRET: "replace-me" + + # --- LLM (optional in web mode: per-repo keys can live in the DB; set a + # default here if you want one) --- + LLM_API_KEY: "hf_or_sk-..." + + # --- Optional: only needed if you wire up inbound GitHub App webhooks. + # An internal ALB can't receive them, so this is usually unused here. --- + # GITHUB_WEBHOOK_SECRET: "replace-me" diff --git a/deploy/helm/templates/_helpers.tpl b/deploy/helm/templates/_helpers.tpl new file mode 100644 index 0000000..3c4230e --- /dev/null +++ b/deploy/helm/templates/_helpers.tpl @@ -0,0 +1,14 @@ +{{- define "name" -}} +{{- default $.Release.Name | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{- define "app.name" -}} +serge +{{- end -}} + +{{- define "labels.standard" -}} +release: {{ $.Release.Name | quote }} +heritage: {{ $.Release.Service | quote }} +chart: "{{ include "name" . }}" +app: "{{ include "app.name" . }}" +{{- end -}} diff --git a/deploy/helm/templates/config.yaml b/deploy/helm/templates/config.yaml new file mode 100644 index 0000000..c4c803e --- /dev/null +++ b/deploy/helm/templates/config.yaml @@ -0,0 +1,10 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + labels: {{ include "labels.standard" . | nindent 4 }} + name: {{ include "name" . }} + namespace: {{ .Release.Namespace }} +data: + {{- range $key, $value := $.Values.envVars }} + {{ $key }}: {{ $value | quote }} + {{- end }} diff --git a/deploy/helm/templates/deployment.yaml b/deploy/helm/templates/deployment.yaml new file mode 100644 index 0000000..7bc3441 --- /dev/null +++ b/deploy/helm/templates/deployment.yaml @@ -0,0 +1,89 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: {{ include "labels.standard" . | nindent 4 }} + name: {{ include "name" . }} + namespace: {{ .Release.Namespace }} +spec: + progressDeadlineSeconds: 600 + replicas: {{ .Values.replicas }} + revisionHistoryLimit: 10 + selector: + matchLabels: {{ include "labels.standard" . | nindent 6 }} + strategy: + # The embedded SQLite store is a single shared writer; never run two + # pods against the same RWO volume during a rollout. + type: Recreate + template: + metadata: + labels: {{ include "labels.standard" . | nindent 8 }} + annotations: + checksum/config: {{ include (print $.Template.BasePath "/config.yaml") . | sha256sum }} + spec: + {{- if .Values.serviceAccount.create }} + serviceAccountName: "{{ .Values.serviceAccount.name | default (include "name" .) }}" + {{- end }} + {{- with .Values.podSecurityContext }} + securityContext: {{ toYaml . | nindent 8 }} + {{- end }} + containers: + - name: {{ include "app.name" . }} + image: "{{ .Values.image.repository }}/{{ .Values.image.name }}:{{ .Values.image.tag }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + {{- with .Values.securityContext }} + securityContext: {{ toYaml . | nindent 12 }} + {{- end }} + readinessProbe: + httpGet: + path: /healthz + port: {{ .Values.containerPort }} + initialDelaySeconds: 5 + periodSeconds: 10 + livenessProbe: + httpGet: + path: /healthz + port: {{ .Values.containerPort }} + initialDelaySeconds: 10 + periodSeconds: 15 + ports: + - containerPort: {{ .Values.containerPort }} + name: http + protocol: TCP + resources: {{ toYaml .Values.resources | nindent 12 }} + env: + # Persist the SQLite job store on the mounted volume. + - name: WEB_STORE_PATH + value: "{{ .Values.persistence.mountPath }}/jobs.db" + {{- with .Values.extraEnv }} + {{- toYaml . | nindent 12 }} + {{- end }} + envFrom: + - configMapRef: + name: {{ include "name" . }} + {{- if .Values.existingSecret }} + - secretRef: + name: {{ .Values.existingSecret }} + {{- end }} + {{- with .Values.extraEnvFrom }} + {{- toYaml . | nindent 12 }} + {{- end }} + volumeMounts: + - name: data + mountPath: {{ .Values.persistence.mountPath }} + volumes: + - name: data + {{- if .Values.persistence.enabled }} + persistentVolumeClaim: + claimName: {{ include "name" . }}-data + {{- else }} + emptyDir: {} + {{- end }} + {{- with .Values.nodeSelector }} + nodeSelector: {{ toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: {{ toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: {{ toYaml . | nindent 8 }} + {{- end }} diff --git a/deploy/helm/templates/ingress.yaml b/deploy/helm/templates/ingress.yaml new file mode 100644 index 0000000..3b76022 --- /dev/null +++ b/deploy/helm/templates/ingress.yaml @@ -0,0 +1,27 @@ +{{- if .Values.ingress.enabled }} +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + annotations: {{ toYaml .Values.ingress.annotations | nindent 4 }} + labels: {{ include "labels.standard" . | nindent 4 }} + name: {{ include "name" . }} + namespace: {{ .Release.Namespace }} +spec: + {{- if .Values.ingress.className }} + ingressClassName: {{ .Values.ingress.className }} + {{- end }} + {{- with .Values.ingress.tls }} + tls: {{ toYaml . | nindent 4 }} + {{- end }} + rules: + - host: {{ required "ingress.host is required when ingress.enabled" .Values.ingress.host | quote }} + http: + paths: + - backend: + service: + name: {{ include "name" . }} + port: + name: http + path: {{ .Values.ingress.path | default "/" }} + pathType: {{ .Values.ingress.pathType | default "Prefix" }} +{{- end }} diff --git a/deploy/helm/templates/pvc.yaml b/deploy/helm/templates/pvc.yaml new file mode 100644 index 0000000..f1954b8 --- /dev/null +++ b/deploy/helm/templates/pvc.yaml @@ -0,0 +1,17 @@ +{{- if .Values.persistence.enabled }} +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: {{ include "name" . }}-data + namespace: {{ .Release.Namespace }} + labels: {{ include "labels.standard" . | nindent 4 }} +spec: + accessModes: + - {{ .Values.persistence.accessMode }} + resources: + requests: + storage: {{ .Values.persistence.size | quote }} + {{- if .Values.persistence.storageClass }} + storageClassName: {{ .Values.persistence.storageClass | quote }} + {{- end }} +{{- end }} diff --git a/deploy/helm/templates/service-account.yaml b/deploy/helm/templates/service-account.yaml new file mode 100644 index 0000000..9094c42 --- /dev/null +++ b/deploy/helm/templates/service-account.yaml @@ -0,0 +1,12 @@ +{{- if .Values.serviceAccount.create }} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: "{{ .Values.serviceAccount.name | default (include "name" .) }}" + namespace: {{ .Release.Namespace }} + labels: {{ include "labels.standard" . | nindent 4 }} + {{- with .Values.serviceAccount.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +{{- end }} diff --git a/deploy/helm/templates/service.yaml b/deploy/helm/templates/service.yaml new file mode 100644 index 0000000..968268c --- /dev/null +++ b/deploy/helm/templates/service.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Service +metadata: + name: "{{ include "name" . }}" + annotations: {{ toYaml .Values.service.annotations | nindent 4 }} + namespace: {{ .Release.Namespace }} + labels: {{ include "labels.standard" . | nindent 4 }} +spec: + ports: + - name: http + port: {{ .Values.service.port }} + protocol: TCP + targetPort: http + selector: {{ include "labels.standard" . | nindent 4 }} + type: {{ .Values.service.type }} diff --git a/deploy/helm/values.yaml b/deploy/helm/values.yaml new file mode 100644 index 0000000..0ec6746 --- /dev/null +++ b/deploy/helm/values.yaml @@ -0,0 +1,86 @@ +image: + repository: ghcr.io/huggingface + name: serge + tag: latest + pullPolicy: IfNotPresent + +# SQLite is a single-writer store and the embedded DB lives on one RWO +# volume, so keep this at 1 (the Deployment also uses the Recreate strategy). +replicas: 1 + +# uvicorn listens here (reviewbot-web defaults to 8080). +containerPort: 8080 + +# Persistent volume for the embedded SQLite job store (review/task metadata, +# drafts, provider configs). The DB file lives here so history survives pod +# restarts. WEB_STORE_PATH is set to /jobs.db by the deployment. +persistence: + enabled: true + storageClass: "" # "" = cluster default StorageClass + accessMode: ReadWriteOnce + size: 5Gi + mountPath: /var/lib/reviewbot + +service: + type: ClusterIP + port: 80 + annotations: {} + +serviceAccount: + create: false + name: "" + annotations: {} + +# Disabled by default — enable and set host/className/annotations for your +# cluster's ingress controller (nginx, ALB, ...). +ingress: + enabled: false + className: "" + host: "" + path: / + pathType: Prefix + annotations: {} + tls: [] + +# Pod/container hardening. fsGroup lets the non-root app user (uid 1000 in +# the image) write to the persistent volume. If your cluster forbids the +# unprivileged user namespaces bubblewrap needs, either relax this or set +# HELPER_SANDBOX=auto/off in envVars. +podSecurityContext: + runAsNonRoot: true + runAsUser: 1000 + runAsGroup: 1000 + fsGroup: 1000 +securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: ["ALL"] + seccompProfile: + type: RuntimeDefault + +resources: + requests: + cpu: 500m + memory: 512Mi + limits: + cpu: "1" + memory: 1Gi + +nodeSelector: {} +tolerations: [] +affinity: {} + +# Non-secret runtime config, rendered into a ConfigMap and injected via +# envFrom. WEB_STORE_PATH is injected separately from persistence.mountPath. +envVars: + PORT: "8080" + LOG_LEVEL: "INFO" + +# Name of a pre-created Secret holding the sensitive env (GITHUB_APP_ID, +# GITHUB_PRIVATE_KEY, GITHUB_WEBHOOK_SECRET, GITHUB_OAUTH_CLIENT_ID, +# GITHUB_OAUTH_CLIENT_SECRET, WEB_SESSION_SECRET, LLM_API_KEY, ...). Injected +# via envFrom. Leave empty to manage everything through envVars/extraEnv. +existingSecret: "" + +extraEnv: [] +extraEnvFrom: [] diff --git a/deploy/scripts/deploy.sh b/deploy/scripts/deploy.sh new file mode 100755 index 0000000..010e418 --- /dev/null +++ b/deploy/scripts/deploy.sh @@ -0,0 +1,120 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" +CHART_DIR="${ROOT_DIR}/deploy/helm" + +release="serge" +namespace="serge" +values_file="${CHART_DIR}/env/prod.yaml" +secret_file="" +expected_context="" +dry_run=0 + +usage() { + cat <<'EOF' +Usage: deploy/scripts/deploy.sh [options] + +Options: + -n, --namespace NAME Kubernetes namespace (default: serge) + -r, --release NAME Helm release name (default: serge) + -f, --values FILE Helm values file (default: deploy/helm/env/prod.yaml) + --secret-file FILE Apply a local Secret manifest before deploying + --context NAME Require this kubectl context before deploying + --dry-run Render manifests without changing the cluster + -h, --help Show this help +EOF +} + +while [[ $# -gt 0 ]]; do + case "$1" in + -n|--namespace) + namespace="$2" + shift 2 + ;; + -r|--release) + release="$2" + shift 2 + ;; + -f|--values) + values_file="$2" + shift 2 + ;; + --secret-file) + secret_file="$2" + shift 2 + ;; + --context) + expected_context="$2" + shift 2 + ;; + --dry-run) + dry_run=1 + shift + ;; + -h|--help) + usage + exit 0 + ;; + *) + echo "unknown option: $1" >&2 + usage >&2 + exit 2 + ;; + esac +done + +require_cmd() { + if ! command -v "$1" >/dev/null 2>&1; then + echo "missing required command: $1" >&2 + exit 1 + fi +} + +require_cmd kubectl +require_cmd helm + +if [[ ! -d "${CHART_DIR}" ]]; then + echo "chart directory not found: ${CHART_DIR}" >&2 + exit 1 +fi + +if [[ ! -f "${values_file}" ]]; then + echo "values file not found: ${values_file}" >&2 + exit 1 +fi + +current_context="$(kubectl config current-context)" +if [[ -n "${expected_context}" && "${current_context}" != "${expected_context}" ]]; then + echo "refusing to deploy to context '${current_context}' (expected '${expected_context}')" >&2 + exit 1 +fi + +echo "Context: ${current_context}" +echo "Namespace: ${namespace}" +echo "Release: ${release}" +echo "Values: ${values_file}" + +if [[ "${dry_run}" -eq 1 ]]; then + helm template "${release}" "${CHART_DIR}" -n "${namespace}" -f "${values_file}" + exit 0 +fi + +kubectl get namespace "${namespace}" >/dev/null 2>&1 || kubectl create namespace "${namespace}" + +if [[ -n "${secret_file}" ]]; then + if [[ ! -f "${secret_file}" ]]; then + echo "secret file not found: ${secret_file}" >&2 + exit 1 + fi + kubectl apply -n "${namespace}" -f "${secret_file}" + kubectl annotate secret serge-secrets -n "${namespace}" kubectl.kubernetes.io/last-applied-configuration- >/dev/null 2>&1 || true +fi + +helm upgrade --install "${release}" "${CHART_DIR}" \ + -n "${namespace}" \ + -f "${values_file}" \ + --wait \ + --timeout 10m + +kubectl rollout status deployment/"${release}" -n "${namespace}" --timeout=10m diff --git a/deploy/scripts/logs.sh b/deploy/scripts/logs.sh new file mode 100755 index 0000000..639dfd6 --- /dev/null +++ b/deploy/scripts/logs.sh @@ -0,0 +1,174 @@ +#!/usr/bin/env bash +set -euo pipefail + +namespace="serge" +release="serge" +since="2h" +follow=0 +grep_pattern="" +expected_context="" +last_error=0 + +usage() { + cat <<'EOF' +Usage: deploy/scripts/logs.sh [options] + +Options: + -n, --namespace NAME Kubernetes namespace (default: serge) + -r, --release NAME Helm release/app label (default: serge) + --since DURATION Log window, e.g. 30m, 2h (default: 2h) + -f, --follow Follow logs + --grep PATTERN Filter logs with grep -Ei + --last-error Print the last ERROR/Traceback block from recent logs + --context NAME Require this kubectl context before reading logs + -h, --help Show this help +EOF +} + +while [[ $# -gt 0 ]]; do + case "$1" in + -n|--namespace) + namespace="$2" + shift 2 + ;; + -r|--release) + release="$2" + shift 2 + ;; + --since) + since="$2" + shift 2 + ;; + -f|--follow) + follow=1 + shift + ;; + --grep) + grep_pattern="$2" + shift 2 + ;; + --last-error) + last_error=1 + shift + ;; + --context) + expected_context="$2" + shift 2 + ;; + -h|--help) + usage + exit 0 + ;; + *) + echo "unknown option: $1" >&2 + usage >&2 + exit 2 + ;; + esac +done + +if ! command -v kubectl >/dev/null 2>&1; then + echo "missing required command: kubectl" >&2 + exit 1 +fi + +if [[ -n "${grep_pattern}" ]] && ! command -v grep >/dev/null 2>&1; then + echo "missing required command: grep" >&2 + exit 1 +fi + +current_context="$(kubectl config current-context)" +if [[ -n "${expected_context}" && "${current_context}" != "${expected_context}" ]]; then + echo "refusing to read logs from context '${current_context}' (expected '${expected_context}')" >&2 + exit 1 +fi + +pod="$( + kubectl get pods -n "${namespace}" \ + -l "app=${release}" \ + --field-selector=status.phase=Running \ + --sort-by=.metadata.creationTimestamp \ + -o jsonpath='{range .items[*]}{.metadata.name}{"\n"}{end}' \ + | tail -n 1 +)" + +if [[ -z "${pod}" ]]; then + echo "no running pod found in namespace '${namespace}' with label app=${release}" >&2 + exit 1 +fi + +pod_started="$(kubectl get pod "${pod}" -n "${namespace}" -o jsonpath='{.status.startTime}')" + +echo "Context: ${current_context}" >&2 +echo "Namespace: ${namespace}" >&2 +echo "Pod: ${pod}" >&2 +echo "Pod started: ${pod_started}" >&2 +echo "Since: ${since}" >&2 + +args=(logs -n "${namespace}" "${pod}" --since="${since}") +if [[ "${follow}" -eq 1 ]]; then + args+=(-f) +fi + +if [[ "${last_error}" -eq 1 ]]; then + if [[ "${follow}" -eq 1 ]]; then + echo "--last-error cannot be combined with --follow" >&2 + exit 2 + fi + kubectl "${args[@]}" | awk ' + function flush() { + if (in_block && block != "") { + last = block + } + block = "" + in_block = 0 + } + + /^[0-9-]+ [0-9:,]+ ERROR / { + flush() + in_block = 1 + block = $0 "\n" + next + } + + /^Traceback \(most recent call last\):/ { + if (!in_block) { + in_block = 1 + block = $0 "\n" + } else { + block = block $0 "\n" + } + next + } + + in_block { + if ($0 ~ /^[0-9-]+ [0-9:,]+ (INFO|WARNING|ERROR|DEBUG) / || $0 ~ /^INFO:/) { + flush() + } + } + + in_block { + block = block $0 "\n" + } + + END { + flush() + if (last != "") { + printf "%s", last + } else { + exit 1 + } + } + ' || { + echo "no ERROR/Traceback block found for pod ${pod} in the last ${since}" >&2 + echo "note: this only covers logs retained for the current pod, which started at ${pod_started}" >&2 + exit 1 + } + exit 0 +fi + +if [[ -n "${grep_pattern}" ]]; then + kubectl "${args[@]}" | grep -Ei "${grep_pattern}" +else + kubectl "${args[@]}" +fi