Add demo notebook with with geographical data for various locations across the US #1066
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: deploy | |
| on: | |
| push: | |
| branches: | |
| - main | |
| pull_request: | |
| branches: | |
| - main | |
| workflow_dispatch: | |
| inputs: | |
| region_id: | |
| description: "Region ID (ignored if all_region_ids is true; for non-production/manual deploy)" | |
| required: false | |
| default: "y14_x5" | |
| type: string | |
| all_region_ids: | |
| description: "Process all region IDs (passes --all-region-ids) for non-production/manual deploy" | |
| required: false | |
| type: boolean | |
| default: false | |
| wipe: | |
| description: "Wipe existing data (icechunk and parquet files) before running (non-production/manual deploy)" | |
| required: false | |
| type: boolean | |
| default: false | |
| environment: | |
| description: "Target environment for manual deploy (qa or staging). Ignored if production_tag is set." | |
| required: true | |
| type: choice | |
| options: | |
| - qa | |
| - staging | |
| default: qa | |
| production_tag: | |
| description: "SemVer tag to redeploy to production (e.g. v1.2.3 or 1.2.3). If set, triggers a production redeploy." | |
| required: false | |
| default: "" | |
| type: string | |
| release: | |
| types: | |
| - published | |
| concurrency: | |
| group: ${{ github.workflow }}-${{ github.ref }} | |
| cancel-in-progress: true | |
| permissions: | |
| id-token: write | |
| contents: read | |
| env: | |
| PRODUCTION_URL: https://ocr.carbonplan.org | |
| STAGING_URL: https://ocr.staging.carbonplan.org | |
| QA_URL: https://ocr.qa.carbonplan.org | |
| jobs: | |
| ocr-coiled-software: | |
| timeout-minutes: 10 | |
| runs-on: ubuntu-latest | |
| outputs: | |
| name: ${{ steps.compute.outputs.name }} | |
| steps: | |
| - name: Checkout source | |
| uses: actions/checkout@v6 | |
| with: | |
| # For releases and manual production re-run, build from the tagged ref | |
| ref: ${{ github.event.inputs.production_tag || github.event.release.tag_name || github.sha }} | |
| - name: Setup Pixi (deploy env) | |
| uses: prefix-dev/setup-pixi@v0.9.4 | |
| with: | |
| cache: false | |
| locked: false | |
| activate-environment: true | |
| environments: deploy | |
| - name: Compute Coiled software environment name | |
| id: compute | |
| shell: bash | |
| run: | | |
| set -euo pipefail | |
| EVENT='${{ github.event_name }}' | |
| NAME_SRC="" | |
| if [[ "$EVENT" == "release" ]]; then | |
| NAME_SRC='${{ github.event.release.tag_name }}' | |
| elif [[ "$EVENT" == "workflow_dispatch" && -n "${{ github.event.inputs.production_tag }}" ]]; then | |
| NAME_SRC='${{ github.event.inputs.production_tag }}' | |
| elif [[ "$EVENT" == "pull_request" ]]; then | |
| NAME_SRC='${{ github.event.pull_request.number }}' | |
| if [[ -z "$NAME_SRC" ]]; then | |
| NAME_SRC='${{ github.head_ref }}' | |
| fi | |
| else | |
| NAME_SRC='${{ github.ref_name }}' | |
| fi | |
| # Lowercase and sanitize to coiled allowed charset [a-z0-9_-] | |
| NAME_SAN=$(printf "%s" "$NAME_SRC" | tr '[:upper:]' '[:lower:]' | sed -E 's/[^a-z0-9_-]+/-/g; s/^-+//; s/-+$//; s/-{2,}/-/g') | |
| if [[ -z "$NAME_SAN" ]]; then | |
| NAME_SAN="unnamed" | |
| fi | |
| FINAL_NAME="ocr-${NAME_SAN}" | |
| echo "Resolved Coiled software env name: $FINAL_NAME" | |
| echo "name=$FINAL_NAME" >> "$GITHUB_OUTPUT" | |
| - name: Export cleaned env and create Coiled software environment | |
| env: | |
| DASK_COILED__TOKEN: ${{ secrets.DASK_COILED__TOKEN }} | |
| COILED_ENV_NAME: ${{ steps.compute.outputs.name }} | |
| run: | | |
| echo "Creating Coiled software environment: $COILED_ENV_NAME" | |
| pixi run export-and-create-coiled | |
| qa-pr: | |
| timeout-minutes: 30 | |
| needs: ocr-coiled-software | |
| # Automatic non-production deploy on PR to main (qa env, limited regions, wipe each time) | |
| if: github.event_name == 'pull_request' && ( | |
| ( | |
| contains(github.event.pull_request.labels.*.name, 'e2e') || | |
| contains(github.event.pull_request.labels.*.name, 'QA/QC') | |
| ) || | |
| ( | |
| contains(github.event.pull_request.title, '[e2e]') || | |
| contains(github.event.pull_request.body, '[e2e]') | |
| ) | |
| ) | |
| environment: | |
| name: qa | |
| url: ${{ env.QA_URL }} | |
| runs-on: ubuntu-latest | |
| env: | |
| COILED_SOFTWARE_ENV_NAME: ${{ needs.ocr-coiled-software.outputs.name }} | |
| steps: | |
| - uses: actions/checkout@v6 | |
| - name: configure aws credentials | |
| uses: aws-actions/configure-aws-credentials@v6 | |
| with: | |
| role-to-assume: arn:aws:iam::631969445205:role/github-action-role | |
| role-session-name: ocr-etl-role-session | |
| aws-region: us-west-2 | |
| - uses: prefix-dev/setup-pixi@v0.9.4 | |
| with: | |
| cache: false | |
| locked: false | |
| activate-environment: true | |
| environments: deploy | |
| - run: pixi info | |
| - name: List installed libraries/packages | |
| run: | | |
| pixi list --environment deploy | |
| - name: Deploy (automatic QA PR) | |
| env: | |
| DASK_COILED__TOKEN: ${{ secrets.DASK_COILED__TOKEN }} | |
| run: | | |
| echo "Starting automatic deploy to QA (PR to main)" | |
| REGION_ARGS="--region-id y14_x3 --region-id y14_x4 --region-id y14_x5 --region-id y14_x6 --region-id y14_x7 | |
| --region-id y13_x3 --region-id y13_x4 --region-id y13_x5 --region-id y13_x6 --region-id y13_x7 | |
| --region-id y12_x3 --region-id y12_x4 --region-id y12_x5 --region-id y12_x6 --region-id y12_x7" | |
| ENV_FILE="ocr-coiled-s3.env" | |
| pixi run --environment deploy ocr run \ | |
| $REGION_ARGS \ | |
| --env-file "$ENV_FILE" \ | |
| --platform coiled \ | |
| --write-regional-stats \ | |
| --wipe | |
| staging-main: | |
| timeout-minutes: 60 | |
| needs: ocr-coiled-software | |
| # Automatic non-production deploy on push to main (staging env, all regions, no wipe) | |
| if: github.event_name == 'push' && github.ref == 'refs/heads/main' | |
| environment: | |
| name: staging | |
| url: ${{ env.STAGING_URL }} | |
| runs-on: ubuntu-latest | |
| env: | |
| COILED_SOFTWARE_ENV_NAME: ${{ needs.ocr-coiled-software.outputs.name }} | |
| steps: | |
| - uses: actions/checkout@v6 | |
| - name: configure aws credentials | |
| uses: aws-actions/configure-aws-credentials@v6 | |
| with: | |
| role-to-assume: arn:aws:iam::631969445205:role/github-action-role | |
| role-session-name: ocr-etl-role-session | |
| aws-region: us-west-2 | |
| - uses: prefix-dev/setup-pixi@v0.9.4 | |
| with: | |
| cache: false | |
| locked: false | |
| activate-environment: true | |
| - run: pixi info | |
| - name: List installed libraries/packages | |
| run: | | |
| pixi list | |
| - name: Ensure OCR_ENVIRONMENT in env file (staging) | |
| env: | |
| ENV_FILE: ocr-coiled-s3-staging.env | |
| run: | | |
| set -euo pipefail | |
| if [[ ! -f "$ENV_FILE" ]]; then | |
| echo "Env file not found: $ENV_FILE" >&2 | |
| exit 1 | |
| fi | |
| echo "Updating $ENV_FILE with OCR_ENVIRONMENT=staging" | |
| echo "Before update (matching lines):" | |
| grep -E '^OCR_ENVIRONMENT[[:space:]]*=' "$ENV_FILE" || true | |
| pixi run python .github/scripts/update_env_file.py --env-file "$ENV_FILE" --set OCR_ENVIRONMENT=staging | |
| echo "After update (matching lines):" | |
| grep -E '^OCR_ENVIRONMENT[[:space:]]*=' "$ENV_FILE" || true | |
| - name: Deploy (automatic staging main) | |
| env: | |
| DASK_COILED__TOKEN: ${{ secrets.DASK_COILED__TOKEN }} | |
| run: | | |
| echo "Starting automatic deploy to staging (main branch push)" | |
| REGION_ARGS="--region-id y14_x3 --region-id y14_x4 --region-id y14_x5 --region-id y14_x6 --region-id y14_x7 | |
| --region-id y13_x3 --region-id y13_x4 --region-id y13_x5 --region-id y13_x6 --region-id y13_x7 | |
| --region-id y12_x3 --region-id y12_x4 --region-id y12_x5 --region-id y12_x6 --region-id y12_x7 | |
| --region-id y8_x3 --region-id y8_x4 --region-id y8_x5 --region-id y8_x6 | |
| --region-id y7_x4 --region-id y7_x5 --region-id y7_x6 --region-id y7_x7 --region-id y7_x8 | |
| --region-id y6_x5 --region-id y6_x6 --region-id y6_x7 --region-id y6_x8 --region-id y6_x9 | |
| --region-id y5_x7 --region-id y5_x8 --region-id y5_x9 | |
| --region-id y9_x14 --region-id y9_x15 --region-id y9_x16 --region-id y9_x17 | |
| --region-id y8_x14 --region-id y8_x15 --region-id y8_x16 --region-id y8_x17 | |
| --region-id y7_x31 --region-id y7_x32 | |
| --region-id y13_x24 --region-id y13_x25 --region-id y13_x26 | |
| --region-id y12_x24 --region-id y12_x25" | |
| ENV_FILE="ocr-coiled-s3-staging.env" | |
| pixi run ocr run \ | |
| $REGION_ARGS \ | |
| --env-file "$ENV_FILE" \ | |
| --platform coiled \ | |
| --write-regional-stats \ | |
| --create-pyramid \ | |
| --wipe | |
| manual: | |
| timeout-minutes: 60 | |
| needs: ocr-coiled-software | |
| # Regular manual (non-production) deploy path | |
| if: github.event_name == 'workflow_dispatch' && github.event.inputs.production_tag == '' | |
| environment: | |
| name: ${{ github.event.inputs.environment }} | |
| url: ${{ github.event.inputs.environment == 'staging' && env.STAGING_URL || env.QA_URL }} | |
| runs-on: ubuntu-latest | |
| env: | |
| COILED_SOFTWARE_ENV_NAME: ${{ needs.ocr-coiled-software.outputs.name }} | |
| steps: | |
| - uses: actions/checkout@v6 | |
| - name: configure aws credentials | |
| uses: aws-actions/configure-aws-credentials@v6 | |
| with: | |
| role-to-assume: arn:aws:iam::631969445205:role/github-action-role | |
| role-session-name: ocr-etl-role-session | |
| aws-region: us-west-2 | |
| - uses: prefix-dev/setup-pixi@v0.9.4 | |
| with: | |
| cache: false | |
| locked: false | |
| activate-environment: true | |
| - run: pixi info | |
| - name: List installed libraries/packages | |
| run: | | |
| pixi list -e dev | |
| - name: Set OCR_ENVIRONMENT | |
| run: | | |
| ENV_INPUT="${{ github.event.inputs.environment }}" | |
| # (Defensive) validate even though choices enforce it | |
| if [[ "$ENV_INPUT" != "qa" && "$ENV_INPUT" != "staging" ]]; then | |
| echo "Invalid environment: $ENV_INPUT (expected qa or staging)" >&2 | |
| exit 1 | |
| fi | |
| echo "Setting OCR_ENVIRONMENT=$ENV_INPUT" | |
| echo "OCR_ENVIRONMENT=$ENV_INPUT" >> "$GITHUB_ENV" | |
| - name: Show environment | |
| run: | | |
| echo "OCR_ENVIRONMENT is $OCR_ENVIRONMENT" | |
| - name: Deploy (manual) | |
| env: | |
| DASK_COILED__TOKEN: ${{ secrets.DASK_COILED__TOKEN }} | |
| OCR_ENVIRONMENT: ${{ env.OCR_ENVIRONMENT }} | |
| run: | | |
| echo "Starting manual deploy to environment: $OCR_ENVIRONMENT" | |
| REGION_ARGS="" | |
| if [[ "${{ github.event.inputs.all_region_ids }}" == "true" ]]; then | |
| REGION_ARGS="--all-region-ids" | |
| if [[ -n "${{ github.event.inputs.region_id }}" ]]; then | |
| echo "Note: all_region_ids=true, ignoring region_id='${{ github.event.inputs.region_id }}'." | |
| fi | |
| elif [[ -n "${{ github.event.inputs.region_id }}" ]]; then | |
| REGION_ARGS="--region-id ${{ github.event.inputs.region_id }}" | |
| else | |
| echo "Error: Provide 'region_id' or set 'all_region_ids' to true." >&2 | |
| exit 1 | |
| fi | |
| WIPE_ARG="" | |
| if [[ "${{ github.event.inputs.wipe }}" == "true" ]]; then | |
| WIPE_ARG="--wipe" | |
| fi | |
| # Select env file based on OCR_ENVIRONMENT | |
| if [[ "$OCR_ENVIRONMENT" == "staging" ]]; then | |
| ENV_FILE="ocr-coiled-s3-staging.env" | |
| else | |
| ENV_FILE="ocr-coiled-s3.env" | |
| fi | |
| # Ensure OCR_ENVIRONMENT is set correctly in the selected env file | |
| if [[ ! -f "$ENV_FILE" ]]; then | |
| echo "Env file not found: $ENV_FILE" >&2 | |
| exit 1 | |
| fi | |
| export ENV_FILE | |
| pixi run python .github/scripts/update_env_file.py --env-file "$ENV_FILE" --set OCR_ENVIRONMENT="$OCR_ENVIRONMENT" | |
| pixi run ocr run \ | |
| $REGION_ARGS \ | |
| --env-file "$ENV_FILE" \ | |
| $WIPE_ARG \ | |
| --write-regional-stats \ | |
| --create-pyramid \ | |
| --platform coiled | |
| production: | |
| timeout-minutes: 240 | |
| needs: ocr-coiled-software | |
| # Automatic production deploy on release publish | |
| if: github.event_name == 'release' | |
| environment: | |
| name: production | |
| url: ${{ env.PRODUCTION_URL }} | |
| runs-on: ubuntu-latest | |
| env: | |
| COILED_SOFTWARE_ENV_NAME: ${{ needs.ocr-coiled-software.outputs.name }} | |
| steps: | |
| - uses: actions/checkout@v6 | |
| - name: configure aws credentials | |
| uses: aws-actions/configure-aws-credentials@v6 | |
| with: | |
| role-to-assume: arn:aws:iam::631969445205:role/github-action-role | |
| role-session-name: ocr-etl-role-session | |
| aws-region: us-west-2 | |
| role-duration-seconds: 12000 | |
| - uses: prefix-dev/setup-pixi@v0.9.4 | |
| with: | |
| cache: false | |
| locked: false | |
| activate-environment: true | |
| - run: pixi info | |
| - name: List installed libraries/packages | |
| run: | | |
| pixi list | |
| - name: Set OCR_VERSION from release tag | |
| id: set_version | |
| run: | | |
| RAW_TAG="${{ github.event.release.tag_name }}" | |
| if [[ -z "$RAW_TAG" ]]; then | |
| RAW_TAG="${GITHUB_REF_NAME:-${GITHUB_REF#refs/tags/}}" | |
| fi | |
| CLEAN="${RAW_TAG#refs/tags/}" | |
| CLEAN="${CLEAN#v}" | |
| SEMVER_REGEX='^(0|[1-9][0-9]*)\.(0|[1-9][0-9]*)\.(0|[1-9][0-9]*)(-[0-9A-Za-z-]+(\.[0-9A-Za-z-]+)*)?(\+[0-9A-Za-z-]+(\.[0-9A-Za-z-]+)*)?$' | |
| if [[ ! "$CLEAN" =~ $SEMVER_REGEX ]]; then | |
| echo "Normalized tag '$CLEAN' (from '$RAW_TAG') is not valid SemVer." >&2 | |
| exit 1 | |
| fi | |
| echo "OCR_VERSION=$CLEAN" >> "$GITHUB_ENV" | |
| echo "ocr_version=$CLEAN" >> "$GITHUB_OUTPUT" | |
| echo "OCR_ENVIRONMENT=production" >> "$GITHUB_ENV" | |
| - name: Show resolved version | |
| run: | | |
| echo "OCR_VERSION: $OCR_VERSION" | |
| echo "OCR_ENVIRONMENT: $OCR_ENVIRONMENT" | |
| - name: Inject OCR_VERSION and OCR_ENVIRONMENT into env file (production) | |
| env: | |
| ENV_FILE: ocr-coiled-s3-production.env | |
| run: | | |
| set -euo pipefail | |
| if [[ ! -f "$ENV_FILE" ]]; then | |
| echo "Env file not found: $ENV_FILE" >&2 | |
| exit 1 | |
| fi | |
| echo "Updating $ENV_FILE with OCR_VERSION=$OCR_VERSION and OCR_ENVIRONMENT=$OCR_ENVIRONMENT" | |
| echo "Before update (matching lines):" | |
| grep -E '^(OCR_VERSION|OCR_ENVIRONMENT)[[:space:]]*=' "$ENV_FILE" || true | |
| pixi run python .github/scripts/update_env_file.py --env-file "$ENV_FILE" --set OCR_VERSION="$OCR_VERSION" --set OCR_ENVIRONMENT="$OCR_ENVIRONMENT" | |
| echo "After update (matching lines):" | |
| grep -E '^(OCR_VERSION|OCR_ENVIRONMENT)[[:space:]]*=' "$ENV_FILE" || true | |
| - name: Deploy (production all regions) | |
| env: | |
| DASK_COILED__TOKEN: ${{ secrets.DASK_COILED__TOKEN }} | |
| OCR_ENVIRONMENT: ${{ env.OCR_ENVIRONMENT }} | |
| OCR_ALLOW_ALL_PROCESSED: "true" | |
| run: | | |
| cat ocr-coiled-s3-production.env | |
| echo "Running production deployment for tag: $OCR_VERSION (environment=$OCR_ENVIRONMENT)" | |
| REGION_ARGS="--all-region-ids" | |
| pixi run ocr run \ | |
| $REGION_ARGS \ | |
| --env-file ocr-coiled-s3-production.env \ | |
| --write-regional-stats \ | |
| --create-pyramid \ | |
| --platform coiled | |
| production-rerun: | |
| timeout-minutes: 240 | |
| needs: ocr-coiled-software | |
| # Manual production redeploy via workflow_dispatch + production_tag input | |
| if: github.event_name == 'workflow_dispatch' && github.event.inputs.production_tag != '' | |
| environment: | |
| name: production | |
| url: ${{ env.PRODUCTION_URL }} | |
| runs-on: ubuntu-latest | |
| env: | |
| COILED_SOFTWARE_ENV_NAME: ${{ needs.ocr-coiled-software.outputs.name }} | |
| steps: | |
| - name: Validate provided tag format (basic pre-check) | |
| id: validate_input | |
| run: | | |
| INPUT_TAG="${{ github.event.inputs.production_tag }}" | |
| if [[ -z "$INPUT_TAG" ]]; then | |
| echo "No production_tag provided." >&2 | |
| exit 1 | |
| fi | |
| echo "Provided production_tag: $INPUT_TAG" | |
| - name: Checkout specified tag | |
| uses: actions/checkout@v6 | |
| with: | |
| ref: ${{ github.event.inputs.production_tag }} | |
| fetch-depth: 0 | |
| - name: configure aws credentials | |
| uses: aws-actions/configure-aws-credentials@v6 | |
| with: | |
| role-to-assume: arn:aws:iam::631969445205:role/github-action-role | |
| role-session-name: ocr-etl-role-session | |
| aws-region: us-west-2 | |
| role-duration-seconds: 12000 | |
| - uses: prefix-dev/setup-pixi@v0.9.4 | |
| with: | |
| cache: false | |
| locked: false | |
| activate-environment: true | |
| - run: pixi info | |
| - name: List installed libraries/packages | |
| run: | | |
| pixi list | |
| - name: Derive & validate OCR_VERSION from input tag | |
| id: set_version | |
| run: | | |
| RAW_TAG="${{ github.event.inputs.production_tag }}" | |
| CLEAN="${RAW_TAG#refs/tags/}" | |
| CLEAN="${CLEAN#v}" | |
| SEMVER_REGEX='^(0|[1-9][0-9]*)\.(0|[1-9][0-9]*)\.(0|[1-9][0-9]*)(-[0-9A-Za-z-]+(\.[0-9A-Za-z-]+)*)?(\+[0-9A-Za-z-]+(\.[0-9A-Za-z-]+)*)?$' | |
| if [[ ! "$CLEAN" =~ $SEMVER_REGEX ]]; then | |
| echo "Provided tag '$RAW_TAG' normalizes to '$CLEAN' which is not valid SemVer." >&2 | |
| exit 1 | |
| fi | |
| CURRENT_TAG_MATCH="$(git tag --points-at HEAD | grep -E "^(${RAW_TAG}|v${CLEAN}|${CLEAN})$" || true)" | |
| if [[ -z "$CURRENT_TAG_MATCH" ]]; then | |
| echo "Warning: HEAD does not appear to match the provided tag '$RAW_TAG' (continuing anyway)." >&2 | |
| fi | |
| echo "OCR_VERSION=$CLEAN" >> "$GITHUB_ENV" | |
| echo "ocr_version=$CLEAN" >> "$GITHUB_OUTPUT" | |
| echo "OCR_ENVIRONMENT=production" >> "$GITHUB_ENV" | |
| - name: Show resolved version | |
| run: | | |
| echo "Redeploying production for OCR_VERSION=$OCR_VERSION (tag input=${{ github.event.inputs.production_tag }})" | |
| echo "OCR_ENVIRONMENT=$OCR_ENVIRONMENT" | |
| - name: Inject OCR_VERSION and OCR_ENVIRONMENT into env file (production rerun) | |
| env: | |
| ENV_FILE: ocr-coiled-s3-production.env | |
| run: | | |
| set -euo pipefail | |
| if [[ ! -f "$ENV_FILE" ]]; then | |
| echo "Env file not found: $ENV_FILE" >&2 | |
| exit 1 | |
| fi | |
| echo "Updating $ENV_FILE with OCR_VERSION=$OCR_VERSION and OCR_ENVIRONMENT=$OCR_ENVIRONMENT" | |
| echo "Before update (matching lines):" | |
| grep -E '^(OCR_VERSION|OCR_ENVIRONMENT)[[:space:]]*=' "$ENV_FILE" || true | |
| pixi run python .github/scripts/update_env_file.py --env-file "$ENV_FILE" --set OCR_VERSION="$OCR_VERSION" --set OCR_ENVIRONMENT="$OCR_ENVIRONMENT" | |
| echo "After update (matching lines):" | |
| grep -E '^(OCR_VERSION|OCR_ENVIRONMENT)[[:space:]]*=' "$ENV_FILE" || true | |
| - name: Deploy (production redeploy all regions) | |
| env: | |
| DASK_COILED__TOKEN: ${{ secrets.DASK_COILED__TOKEN }} | |
| OCR_ENVIRONMENT: ${{ env.OCR_ENVIRONMENT }} | |
| OCR_ALLOW_ALL_PROCESSED: "true" | |
| run: | | |
| cat ocr-coiled-s3-production.env | |
| REGION_ARGS="--all-region-ids" | |
| pixi run ocr run \ | |
| $REGION_ARGS \ | |
| --env-file ocr-coiled-s3-production.env \ | |
| --write-regional-stats \ | |
| --create-pyramid \ | |
| --platform coiled |