Skip to content

Add demo notebook with with geographical data for various locations across the US #1066

Add demo notebook with with geographical data for various locations across the US

Add demo notebook with with geographical data for various locations across the US #1066

Workflow file for this run

name: deploy
on:
push:
branches:
- main
pull_request:
branches:
- main
workflow_dispatch:
inputs:
region_id:
description: "Region ID (ignored if all_region_ids is true; for non-production/manual deploy)"
required: false
default: "y14_x5"
type: string
all_region_ids:
description: "Process all region IDs (passes --all-region-ids) for non-production/manual deploy"
required: false
type: boolean
default: false
wipe:
description: "Wipe existing data (icechunk and parquet files) before running (non-production/manual deploy)"
required: false
type: boolean
default: false
environment:
description: "Target environment for manual deploy (qa or staging). Ignored if production_tag is set."
required: true
type: choice
options:
- qa
- staging
default: qa
production_tag:
description: "SemVer tag to redeploy to production (e.g. v1.2.3 or 1.2.3). If set, triggers a production redeploy."
required: false
default: ""
type: string
release:
types:
- published
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
permissions:
id-token: write
contents: read
env:
PRODUCTION_URL: https://ocr.carbonplan.org
STAGING_URL: https://ocr.staging.carbonplan.org
QA_URL: https://ocr.qa.carbonplan.org
jobs:
ocr-coiled-software:
timeout-minutes: 10
runs-on: ubuntu-latest
outputs:
name: ${{ steps.compute.outputs.name }}
steps:
- name: Checkout source
uses: actions/checkout@v6
with:
# For releases and manual production re-run, build from the tagged ref
ref: ${{ github.event.inputs.production_tag || github.event.release.tag_name || github.sha }}
- name: Setup Pixi (deploy env)
uses: prefix-dev/setup-pixi@v0.9.4
with:
cache: false
locked: false
activate-environment: true
environments: deploy
- name: Compute Coiled software environment name
id: compute
shell: bash
run: |
set -euo pipefail
EVENT='${{ github.event_name }}'
NAME_SRC=""
if [[ "$EVENT" == "release" ]]; then
NAME_SRC='${{ github.event.release.tag_name }}'
elif [[ "$EVENT" == "workflow_dispatch" && -n "${{ github.event.inputs.production_tag }}" ]]; then
NAME_SRC='${{ github.event.inputs.production_tag }}'
elif [[ "$EVENT" == "pull_request" ]]; then
NAME_SRC='${{ github.event.pull_request.number }}'
if [[ -z "$NAME_SRC" ]]; then
NAME_SRC='${{ github.head_ref }}'
fi
else
NAME_SRC='${{ github.ref_name }}'
fi
# Lowercase and sanitize to coiled allowed charset [a-z0-9_-]
NAME_SAN=$(printf "%s" "$NAME_SRC" | tr '[:upper:]' '[:lower:]' | sed -E 's/[^a-z0-9_-]+/-/g; s/^-+//; s/-+$//; s/-{2,}/-/g')
if [[ -z "$NAME_SAN" ]]; then
NAME_SAN="unnamed"
fi
FINAL_NAME="ocr-${NAME_SAN}"
echo "Resolved Coiled software env name: $FINAL_NAME"
echo "name=$FINAL_NAME" >> "$GITHUB_OUTPUT"
- name: Export cleaned env and create Coiled software environment
env:
DASK_COILED__TOKEN: ${{ secrets.DASK_COILED__TOKEN }}
COILED_ENV_NAME: ${{ steps.compute.outputs.name }}
run: |
echo "Creating Coiled software environment: $COILED_ENV_NAME"
pixi run export-and-create-coiled
qa-pr:
timeout-minutes: 30
needs: ocr-coiled-software
# Automatic non-production deploy on PR to main (qa env, limited regions, wipe each time)
if: github.event_name == 'pull_request' && (
(
contains(github.event.pull_request.labels.*.name, 'e2e') ||
contains(github.event.pull_request.labels.*.name, 'QA/QC')
) ||
(
contains(github.event.pull_request.title, '[e2e]') ||
contains(github.event.pull_request.body, '[e2e]')
)
)
environment:
name: qa
url: ${{ env.QA_URL }}
runs-on: ubuntu-latest
env:
COILED_SOFTWARE_ENV_NAME: ${{ needs.ocr-coiled-software.outputs.name }}
steps:
- uses: actions/checkout@v6
- name: configure aws credentials
uses: aws-actions/configure-aws-credentials@v6
with:
role-to-assume: arn:aws:iam::631969445205:role/github-action-role
role-session-name: ocr-etl-role-session
aws-region: us-west-2
- uses: prefix-dev/setup-pixi@v0.9.4
with:
cache: false
locked: false
activate-environment: true
environments: deploy
- run: pixi info
- name: List installed libraries/packages
run: |
pixi list --environment deploy
- name: Deploy (automatic QA PR)
env:
DASK_COILED__TOKEN: ${{ secrets.DASK_COILED__TOKEN }}
run: |
echo "Starting automatic deploy to QA (PR to main)"
REGION_ARGS="--region-id y14_x3 --region-id y14_x4 --region-id y14_x5 --region-id y14_x6 --region-id y14_x7
--region-id y13_x3 --region-id y13_x4 --region-id y13_x5 --region-id y13_x6 --region-id y13_x7
--region-id y12_x3 --region-id y12_x4 --region-id y12_x5 --region-id y12_x6 --region-id y12_x7"
ENV_FILE="ocr-coiled-s3.env"
pixi run --environment deploy ocr run \
$REGION_ARGS \
--env-file "$ENV_FILE" \
--platform coiled \
--write-regional-stats \
--wipe
staging-main:
timeout-minutes: 60
needs: ocr-coiled-software
# Automatic non-production deploy on push to main (staging env, all regions, no wipe)
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
environment:
name: staging
url: ${{ env.STAGING_URL }}
runs-on: ubuntu-latest
env:
COILED_SOFTWARE_ENV_NAME: ${{ needs.ocr-coiled-software.outputs.name }}
steps:
- uses: actions/checkout@v6
- name: configure aws credentials
uses: aws-actions/configure-aws-credentials@v6
with:
role-to-assume: arn:aws:iam::631969445205:role/github-action-role
role-session-name: ocr-etl-role-session
aws-region: us-west-2
- uses: prefix-dev/setup-pixi@v0.9.4
with:
cache: false
locked: false
activate-environment: true
- run: pixi info
- name: List installed libraries/packages
run: |
pixi list
- name: Ensure OCR_ENVIRONMENT in env file (staging)
env:
ENV_FILE: ocr-coiled-s3-staging.env
run: |
set -euo pipefail
if [[ ! -f "$ENV_FILE" ]]; then
echo "Env file not found: $ENV_FILE" >&2
exit 1
fi
echo "Updating $ENV_FILE with OCR_ENVIRONMENT=staging"
echo "Before update (matching lines):"
grep -E '^OCR_ENVIRONMENT[[:space:]]*=' "$ENV_FILE" || true
pixi run python .github/scripts/update_env_file.py --env-file "$ENV_FILE" --set OCR_ENVIRONMENT=staging
echo "After update (matching lines):"
grep -E '^OCR_ENVIRONMENT[[:space:]]*=' "$ENV_FILE" || true
- name: Deploy (automatic staging main)
env:
DASK_COILED__TOKEN: ${{ secrets.DASK_COILED__TOKEN }}
run: |
echo "Starting automatic deploy to staging (main branch push)"
REGION_ARGS="--region-id y14_x3 --region-id y14_x4 --region-id y14_x5 --region-id y14_x6 --region-id y14_x7
--region-id y13_x3 --region-id y13_x4 --region-id y13_x5 --region-id y13_x6 --region-id y13_x7
--region-id y12_x3 --region-id y12_x4 --region-id y12_x5 --region-id y12_x6 --region-id y12_x7
--region-id y8_x3 --region-id y8_x4 --region-id y8_x5 --region-id y8_x6
--region-id y7_x4 --region-id y7_x5 --region-id y7_x6 --region-id y7_x7 --region-id y7_x8
--region-id y6_x5 --region-id y6_x6 --region-id y6_x7 --region-id y6_x8 --region-id y6_x9
--region-id y5_x7 --region-id y5_x8 --region-id y5_x9
--region-id y9_x14 --region-id y9_x15 --region-id y9_x16 --region-id y9_x17
--region-id y8_x14 --region-id y8_x15 --region-id y8_x16 --region-id y8_x17
--region-id y7_x31 --region-id y7_x32
--region-id y13_x24 --region-id y13_x25 --region-id y13_x26
--region-id y12_x24 --region-id y12_x25"
ENV_FILE="ocr-coiled-s3-staging.env"
pixi run ocr run \
$REGION_ARGS \
--env-file "$ENV_FILE" \
--platform coiled \
--write-regional-stats \
--create-pyramid \
--wipe
manual:
timeout-minutes: 60
needs: ocr-coiled-software
# Regular manual (non-production) deploy path
if: github.event_name == 'workflow_dispatch' && github.event.inputs.production_tag == ''
environment:
name: ${{ github.event.inputs.environment }}
url: ${{ github.event.inputs.environment == 'staging' && env.STAGING_URL || env.QA_URL }}
runs-on: ubuntu-latest
env:
COILED_SOFTWARE_ENV_NAME: ${{ needs.ocr-coiled-software.outputs.name }}
steps:
- uses: actions/checkout@v6
- name: configure aws credentials
uses: aws-actions/configure-aws-credentials@v6
with:
role-to-assume: arn:aws:iam::631969445205:role/github-action-role
role-session-name: ocr-etl-role-session
aws-region: us-west-2
- uses: prefix-dev/setup-pixi@v0.9.4
with:
cache: false
locked: false
activate-environment: true
- run: pixi info
- name: List installed libraries/packages
run: |
pixi list -e dev
- name: Set OCR_ENVIRONMENT
run: |
ENV_INPUT="${{ github.event.inputs.environment }}"
# (Defensive) validate even though choices enforce it
if [[ "$ENV_INPUT" != "qa" && "$ENV_INPUT" != "staging" ]]; then
echo "Invalid environment: $ENV_INPUT (expected qa or staging)" >&2
exit 1
fi
echo "Setting OCR_ENVIRONMENT=$ENV_INPUT"
echo "OCR_ENVIRONMENT=$ENV_INPUT" >> "$GITHUB_ENV"
- name: Show environment
run: |
echo "OCR_ENVIRONMENT is $OCR_ENVIRONMENT"
- name: Deploy (manual)
env:
DASK_COILED__TOKEN: ${{ secrets.DASK_COILED__TOKEN }}
OCR_ENVIRONMENT: ${{ env.OCR_ENVIRONMENT }}
run: |
echo "Starting manual deploy to environment: $OCR_ENVIRONMENT"
REGION_ARGS=""
if [[ "${{ github.event.inputs.all_region_ids }}" == "true" ]]; then
REGION_ARGS="--all-region-ids"
if [[ -n "${{ github.event.inputs.region_id }}" ]]; then
echo "Note: all_region_ids=true, ignoring region_id='${{ github.event.inputs.region_id }}'."
fi
elif [[ -n "${{ github.event.inputs.region_id }}" ]]; then
REGION_ARGS="--region-id ${{ github.event.inputs.region_id }}"
else
echo "Error: Provide 'region_id' or set 'all_region_ids' to true." >&2
exit 1
fi
WIPE_ARG=""
if [[ "${{ github.event.inputs.wipe }}" == "true" ]]; then
WIPE_ARG="--wipe"
fi
# Select env file based on OCR_ENVIRONMENT
if [[ "$OCR_ENVIRONMENT" == "staging" ]]; then
ENV_FILE="ocr-coiled-s3-staging.env"
else
ENV_FILE="ocr-coiled-s3.env"
fi
# Ensure OCR_ENVIRONMENT is set correctly in the selected env file
if [[ ! -f "$ENV_FILE" ]]; then
echo "Env file not found: $ENV_FILE" >&2
exit 1
fi
export ENV_FILE
pixi run python .github/scripts/update_env_file.py --env-file "$ENV_FILE" --set OCR_ENVIRONMENT="$OCR_ENVIRONMENT"
pixi run ocr run \
$REGION_ARGS \
--env-file "$ENV_FILE" \
$WIPE_ARG \
--write-regional-stats \
--create-pyramid \
--platform coiled
production:
timeout-minutes: 240
needs: ocr-coiled-software
# Automatic production deploy on release publish
if: github.event_name == 'release'
environment:
name: production
url: ${{ env.PRODUCTION_URL }}
runs-on: ubuntu-latest
env:
COILED_SOFTWARE_ENV_NAME: ${{ needs.ocr-coiled-software.outputs.name }}
steps:
- uses: actions/checkout@v6
- name: configure aws credentials
uses: aws-actions/configure-aws-credentials@v6
with:
role-to-assume: arn:aws:iam::631969445205:role/github-action-role
role-session-name: ocr-etl-role-session
aws-region: us-west-2
role-duration-seconds: 12000
- uses: prefix-dev/setup-pixi@v0.9.4
with:
cache: false
locked: false
activate-environment: true
- run: pixi info
- name: List installed libraries/packages
run: |
pixi list
- name: Set OCR_VERSION from release tag
id: set_version
run: |
RAW_TAG="${{ github.event.release.tag_name }}"
if [[ -z "$RAW_TAG" ]]; then
RAW_TAG="${GITHUB_REF_NAME:-${GITHUB_REF#refs/tags/}}"
fi
CLEAN="${RAW_TAG#refs/tags/}"
CLEAN="${CLEAN#v}"
SEMVER_REGEX='^(0|[1-9][0-9]*)\.(0|[1-9][0-9]*)\.(0|[1-9][0-9]*)(-[0-9A-Za-z-]+(\.[0-9A-Za-z-]+)*)?(\+[0-9A-Za-z-]+(\.[0-9A-Za-z-]+)*)?$'
if [[ ! "$CLEAN" =~ $SEMVER_REGEX ]]; then
echo "Normalized tag '$CLEAN' (from '$RAW_TAG') is not valid SemVer." >&2
exit 1
fi
echo "OCR_VERSION=$CLEAN" >> "$GITHUB_ENV"
echo "ocr_version=$CLEAN" >> "$GITHUB_OUTPUT"
echo "OCR_ENVIRONMENT=production" >> "$GITHUB_ENV"
- name: Show resolved version
run: |
echo "OCR_VERSION: $OCR_VERSION"
echo "OCR_ENVIRONMENT: $OCR_ENVIRONMENT"
- name: Inject OCR_VERSION and OCR_ENVIRONMENT into env file (production)
env:
ENV_FILE: ocr-coiled-s3-production.env
run: |
set -euo pipefail
if [[ ! -f "$ENV_FILE" ]]; then
echo "Env file not found: $ENV_FILE" >&2
exit 1
fi
echo "Updating $ENV_FILE with OCR_VERSION=$OCR_VERSION and OCR_ENVIRONMENT=$OCR_ENVIRONMENT"
echo "Before update (matching lines):"
grep -E '^(OCR_VERSION|OCR_ENVIRONMENT)[[:space:]]*=' "$ENV_FILE" || true
pixi run python .github/scripts/update_env_file.py --env-file "$ENV_FILE" --set OCR_VERSION="$OCR_VERSION" --set OCR_ENVIRONMENT="$OCR_ENVIRONMENT"
echo "After update (matching lines):"
grep -E '^(OCR_VERSION|OCR_ENVIRONMENT)[[:space:]]*=' "$ENV_FILE" || true
- name: Deploy (production all regions)
env:
DASK_COILED__TOKEN: ${{ secrets.DASK_COILED__TOKEN }}
OCR_ENVIRONMENT: ${{ env.OCR_ENVIRONMENT }}
OCR_ALLOW_ALL_PROCESSED: "true"
run: |
cat ocr-coiled-s3-production.env
echo "Running production deployment for tag: $OCR_VERSION (environment=$OCR_ENVIRONMENT)"
REGION_ARGS="--all-region-ids"
pixi run ocr run \
$REGION_ARGS \
--env-file ocr-coiled-s3-production.env \
--write-regional-stats \
--create-pyramid \
--platform coiled
production-rerun:
timeout-minutes: 240
needs: ocr-coiled-software
# Manual production redeploy via workflow_dispatch + production_tag input
if: github.event_name == 'workflow_dispatch' && github.event.inputs.production_tag != ''
environment:
name: production
url: ${{ env.PRODUCTION_URL }}
runs-on: ubuntu-latest
env:
COILED_SOFTWARE_ENV_NAME: ${{ needs.ocr-coiled-software.outputs.name }}
steps:
- name: Validate provided tag format (basic pre-check)
id: validate_input
run: |
INPUT_TAG="${{ github.event.inputs.production_tag }}"
if [[ -z "$INPUT_TAG" ]]; then
echo "No production_tag provided." >&2
exit 1
fi
echo "Provided production_tag: $INPUT_TAG"
- name: Checkout specified tag
uses: actions/checkout@v6
with:
ref: ${{ github.event.inputs.production_tag }}
fetch-depth: 0
- name: configure aws credentials
uses: aws-actions/configure-aws-credentials@v6
with:
role-to-assume: arn:aws:iam::631969445205:role/github-action-role
role-session-name: ocr-etl-role-session
aws-region: us-west-2
role-duration-seconds: 12000
- uses: prefix-dev/setup-pixi@v0.9.4
with:
cache: false
locked: false
activate-environment: true
- run: pixi info
- name: List installed libraries/packages
run: |
pixi list
- name: Derive & validate OCR_VERSION from input tag
id: set_version
run: |
RAW_TAG="${{ github.event.inputs.production_tag }}"
CLEAN="${RAW_TAG#refs/tags/}"
CLEAN="${CLEAN#v}"
SEMVER_REGEX='^(0|[1-9][0-9]*)\.(0|[1-9][0-9]*)\.(0|[1-9][0-9]*)(-[0-9A-Za-z-]+(\.[0-9A-Za-z-]+)*)?(\+[0-9A-Za-z-]+(\.[0-9A-Za-z-]+)*)?$'
if [[ ! "$CLEAN" =~ $SEMVER_REGEX ]]; then
echo "Provided tag '$RAW_TAG' normalizes to '$CLEAN' which is not valid SemVer." >&2
exit 1
fi
CURRENT_TAG_MATCH="$(git tag --points-at HEAD | grep -E "^(${RAW_TAG}|v${CLEAN}|${CLEAN})$" || true)"
if [[ -z "$CURRENT_TAG_MATCH" ]]; then
echo "Warning: HEAD does not appear to match the provided tag '$RAW_TAG' (continuing anyway)." >&2
fi
echo "OCR_VERSION=$CLEAN" >> "$GITHUB_ENV"
echo "ocr_version=$CLEAN" >> "$GITHUB_OUTPUT"
echo "OCR_ENVIRONMENT=production" >> "$GITHUB_ENV"
- name: Show resolved version
run: |
echo "Redeploying production for OCR_VERSION=$OCR_VERSION (tag input=${{ github.event.inputs.production_tag }})"
echo "OCR_ENVIRONMENT=$OCR_ENVIRONMENT"
- name: Inject OCR_VERSION and OCR_ENVIRONMENT into env file (production rerun)
env:
ENV_FILE: ocr-coiled-s3-production.env
run: |
set -euo pipefail
if [[ ! -f "$ENV_FILE" ]]; then
echo "Env file not found: $ENV_FILE" >&2
exit 1
fi
echo "Updating $ENV_FILE with OCR_VERSION=$OCR_VERSION and OCR_ENVIRONMENT=$OCR_ENVIRONMENT"
echo "Before update (matching lines):"
grep -E '^(OCR_VERSION|OCR_ENVIRONMENT)[[:space:]]*=' "$ENV_FILE" || true
pixi run python .github/scripts/update_env_file.py --env-file "$ENV_FILE" --set OCR_VERSION="$OCR_VERSION" --set OCR_ENVIRONMENT="$OCR_ENVIRONMENT"
echo "After update (matching lines):"
grep -E '^(OCR_VERSION|OCR_ENVIRONMENT)[[:space:]]*=' "$ENV_FILE" || true
- name: Deploy (production redeploy all regions)
env:
DASK_COILED__TOKEN: ${{ secrets.DASK_COILED__TOKEN }}
OCR_ENVIRONMENT: ${{ env.OCR_ENVIRONMENT }}
OCR_ALLOW_ALL_PROCESSED: "true"
run: |
cat ocr-coiled-s3-production.env
REGION_ARGS="--all-region-ids"
pixi run ocr run \
$REGION_ARGS \
--env-file ocr-coiled-s3-production.env \
--write-regional-stats \
--create-pyramid \
--platform coiled