Incremental data sync #12201
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Incremental data sync | |
on: | |
workflow_dispatch: {} | |
schedule: | |
- cron: '30 */2 * * *' | |
concurrency: | |
group: ${{ github.workflow }} | |
cancel-in-progress: false | |
jobs: | |
sync: | |
runs-on: ubuntu-latest | |
env: | |
GITHUB_TOKEN: ${{ secrets.PERSONAL_GITHUB_TOKEN }} | |
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} | |
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} | |
AWS_REGION: ${{ secrets.AWS_REGION }} | |
TRINO_VERSION: 459 | |
TRINO_REST_VERSION: "0.155" | |
GITHUB_OWNER: trinodb | |
GITHUB_REPO: trino | |
TRINO_DEST_SCHEMA: trinocicd.v2 | |
TRINO_SRC_SCHEMA: github.default | |
EMPTY_INSERT_LIMIT: 5 | |
# TODO temporarily removed artifacts since they're causing OOMs, figure out a different way to fetch them | |
SYNC_TABLES: runs,jobs,steps,check_suites,check_runs,check_run_annotations,pulls,pull_commits,pull_stats,review_comments,reviews,issues,issue_comments,commits,teams,members | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Download trino-rest | |
run: | | |
curl -fLOsS https://github.com/nineinchnick/trino-rest/releases/download/v$TRINO_REST_VERSION/trino-rest-github-$TRINO_REST_VERSION.zip | |
unzip trino-rest-github-$TRINO_REST_VERSION.zip | |
- name: Start Trino | |
run: | | |
cat <<EOF >config.properties | |
coordinator=true | |
node-scheduler.include-coordinator=true | |
http-server.http.port=8080 | |
discovery.uri=http://localhost:8080 | |
query.max-memory-per-node=4086929818B | |
EOF | |
mkdir -p $(pwd)/hive-cache && chmod go+w $(pwd)/hive-cache | |
CONTAINER_ID=$(docker run \ | |
-v $(pwd)/config.properties:/etc/trino/config.properties \ | |
-v $(pwd)/trino-rest-github-$TRINO_REST_VERSION:/usr/lib/trino/plugin/github \ | |
-v $(pwd)/catalog/github.properties:/etc/trino/catalog/github.properties \ | |
-v $(pwd)/catalog/trinocicd.properties:/etc/trino/catalog/trinocicd.properties \ | |
-v $(pwd)/hive-cache:/opt/hive-cache \ | |
-v $(pwd)/sql:/sql \ | |
-e AWS_ACCESS_KEY_ID \ | |
-e AWS_SECRET_ACCESS_KEY \ | |
-e AWS_REGION \ | |
-e GITHUB_TOKEN \ | |
-p 8080:8080 \ | |
--name trino \ | |
-d \ | |
trinodb/trino:$TRINO_VERSION) | |
SERVER_IP=$(docker inspect --format '{{range.NetworkSettings.Networks}}{{.IPAddress}}{{end}}' "$CONTAINER_ID") | |
echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV | |
echo "SERVER_IP=$SERVER_IP" >> $GITHUB_ENV | |
i=0 | |
until docker inspect "${CONTAINER_ID}" --format "{{json .State.Health.Status }}" | grep -q '"healthy"'; do | |
if [[ $((i++)) -ge 10 ]]; then | |
echo "🚨 Too many retries waiting for Trino to start" | |
exit 1 | |
fi | |
sleep 10 | |
done | |
- name: Run Sync | |
run: | | |
docker run \ | |
-e GITHUB_OWNER \ | |
-e GITHUB_REPO \ | |
-e TRINO_DEST_SCHEMA \ | |
-e TRINO_SRC_SCHEMA \ | |
-e EMPTY_INSERT_LIMIT \ | |
-e SYNC_TABLES \ | |
-e TRINO_URL=jdbc:trino://$SERVER_IP:8080/github/default \ | |
--rm \ | |
nineinchnick/trino-rest:$TRINO_REST_VERSION \ | |
java -cp "/usr/lib/trino/plugin/github/*" pl.net.was.rest.github.Sync | |
- name: Run Sync for Starburst Teams | |
run: | | |
docker run \ | |
-e GITHUB_OWNER=starburstdata \ | |
-e GITHUB_REPO \ | |
-e TRINO_DEST_SCHEMA \ | |
-e TRINO_SRC_SCHEMA \ | |
-e EMPTY_INSERT_LIMIT \ | |
-e SYNC_TABLES=teams,members \ | |
-e TRINO_URL=jdbc:trino://$SERVER_IP:8080/github/default \ | |
--rm \ | |
nineinchnick/trino-rest:$TRINO_REST_VERSION \ | |
java -cp "/usr/lib/trino/plugin/github/*" pl.net.was.rest.github.Sync | |
- name: Dump Trino logs | |
if: always() | |
run: | | |
docker logs trino |