Skip to content

Commit c5f14f9

Browse files
authored
Use planet-dump-ng for planet dump generation (#368)
* Add planet-dump-ng for planet file processing * Update scripts and continaer for planet-dump-ng * Update docker base image for planet dump and history * Update planet-dump-ng version * Update docker base image for planet dump * Compress dump files to reduce size fo the files * Fix download dump function to support gz files
1 parent 746429f commit c5f14f9

File tree

10 files changed

+311
-151
lines changed

10 files changed

+311
-151
lines changed

compose/db-backup-restore.yml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,9 @@
1-
version: '3'
21
services:
32
#####################################################
43
## OSM Database backup and restore section
54
#####################################################
65
db-backup-restore:
7-
image: osmseed-backup-restore:v1
6+
image: rub21/osmseed-backup-restore:v1
87
build:
98
context: ../images/backup-restore
109
dockerfile: Dockerfile

images/backup-restore/start.sh

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -35,25 +35,27 @@ cloudStorageOps() {
3535
}
3636

3737
backupDB() {
38-
local LOCAL_BACKUP_FILE=${BACKUP_CLOUD_FILE}.sql.gz
39-
local CLOUD_BACKUP_FILE="${BACKUP_CLOUD_FOLDER}/${BACKUP_CLOUD_FILE}.sql.gz"
40-
if [ "$SET_DATE_AT_NAME" == "true" ]; then
41-
local CURRENT_DATE=$(date '+%Y%m%d-%H%M')
42-
LOCAL_BACKUP_FILE="${BACKUP_CLOUD_FILE}-${CURRENT_DATE}.sql.gz"
43-
CLOUD_BACKUP_FILE="${BACKUP_CLOUD_FOLDER}/${BACKUP_CLOUD_FILE}-${CURRENT_DATE}.sql.gz"
44-
fi
38+
local LOCAL_BACKUP_FILE="${BACKUP_CLOUD_FILE}.dump"
39+
local LOCAL_BACKUP_FILE_GZIP="${BACKUP_CLOUD_FILE}.dump.gz"
40+
local CLOUD_BACKUP_FILE="${BACKUP_CLOUD_FOLDER}/${BACKUP_CLOUD_FILE}.dump.gz"
4541

46-
# Backup database with max compression
47-
echo "Backing up DB ${POSTGRES_DB} into ${LOCAL_BACKUP_FILE}"
48-
pg_dump -h ${POSTGRES_HOST} -U ${POSTGRES_USER} ${POSTGRES_DB} | gzip -9 >${LOCAL_BACKUP_FILE}
42+
if [ "$SET_DATE_AT_NAME" == "true" ]; then
43+
local CURRENT_DATE
44+
CURRENT_DATE=$(date '+%Y%m%d-%H%M')
45+
LOCAL_BACKUP_FILE="${BACKUP_CLOUD_FILE}-${CURRENT_DATE}.dump"
46+
LOCAL_BACKUP_FILE_GZIP="${BACKUP_CLOUD_FILE}-${CURRENT_DATE}.dump.gz"
47+
CLOUD_BACKUP_FILE="${BACKUP_CLOUD_FOLDER}/${BACKUP_CLOUD_FILE}-${CURRENT_DATE}.dump.gz"
48+
fi
4949

50-
# Handle cloud storage based on the provider
51-
cloudStorageOps "${LOCAL_BACKUP_FILE}" "${CLOUD_BACKUP_FILE}"
50+
# Backup database with pg_dump custom format (-Fc) + gzip
51+
echo "Backing up DB ${POSTGRES_DB} into ${LOCAL_BACKUP_FILE_GZIP}"
52+
pg_dump -h "${POSTGRES_HOST}" -U "${POSTGRES_USER}" -Fc "${POSTGRES_DB}" | gzip -9 > "${LOCAL_BACKUP_FILE}.gz"
53+
cloudStorageOps "${LOCAL_BACKUP_FILE_GZIP}" "${CLOUD_BACKUP_FILE}"
5254
}
5355

5456
restoreDB() {
5557
local CURRENT_DATE=$(date '+%Y%m%d-%H%M')
56-
local RESTORE_FILE="backup.sql.gz"
58+
local RESTORE_FILE="backup.dump"
5759
local LOG_RESULT_FILE="restore_results-${CURRENT_DATE}.log"
5860
local flag=true
5961

@@ -62,7 +64,7 @@ restoreDB() {
6264
flag=false
6365
wget -O ${RESTORE_FILE} ${RESTORE_URL_FILE}
6466
echo "Restoring ${RESTORE_URL_FILE} in ${POSTGRES_DB}"
65-
gunzip -c <${RESTORE_FILE} | psql -h ${POSTGRES_HOST} -U ${POSTGRES_USER} -d ${POSTGRES_DB} | tee ${LOG_RESULT_FILE}
67+
pg_restore -h ${POSTGRES_HOST} -U ${POSTGRES_USER} -d ${POSTGRES_DB} --create --no-owner ${RESTORE_FILE} | tee ${LOG_RESULT_FILE}
6668
# aws s3 cp ${LOG_RESULT_FILE} s3://${AWS_S3_BUCKET}/${LOG_RESULT_FILE}
6769
echo "Import data to ${POSTGRES_DB} has finished ..."
6870
done

images/full-history/Dockerfile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
FROM developmentseed/osmseed-osm-processor:0.1.0-n802.h0d9f574
1+
FROM developmentseed/osmseed-osm-processor:0.1.0-0.dev.git.964.h5e9b11b
22

33
VOLUME /mnt/data
44
COPY ./start.sh /
5-
CMD /start.sh
5+
CMD /start.sh

images/full-history/start.sh

Lines changed: 114 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -1,82 +1,130 @@
11
#!/usr/bin/env bash
22
set -e
3-
export VOLUME_DIR=/mnt/data
43

54
# osmosis tuning: https://wiki.openstreetmap.org/wiki/Osmosis/Tuning,https://lists.openstreetmap.org/pipermail/talk/2012-October/064771.html
65
if [ -z "$MEMORY_JAVACMD_OPTIONS" ]; then
7-
echo JAVACMD_OPTIONS=\"-server\" >~/.osmosis
6+
echo JAVACMD_OPTIONS="-server" >~/.osmosis
87
else
98
memory="${MEMORY_JAVACMD_OPTIONS//i/}"
10-
echo JAVACMD_OPTIONS=\"-server -Xmx$memory\" >~/.osmosis
9+
echo JAVACMD_OPTIONS="-server -Xmx$memory" >~/.osmosis
1110
fi
1211

13-
# Fixing name for historical file
12+
export VOLUME_DIR=/mnt/data
13+
export PLANET_EPOCH_DATE="${PLANET_EPOCH_DATE:-2004-01-01}"
1414
date=$(date '+%y%m%d_%H%M')
15-
local_fullHistoryFile=$VOLUME_DIR/history-${date}.osh.pbf
16-
cloud_fullHistoryFile=planet/full-history/history-${date}.osh.pbf
17-
18-
# In case overwrite the file
19-
if [ "$OVERWRITE_FHISTORY_FILE" == "true" ]; then
20-
local_fullHistoryFile=$VOLUME_DIR/history-latest.osh.pbf
21-
cloud_fullHistoryFile=planet/full-history/history-latest.osh.pbf
22-
fi
2315

24-
# State file nname
16+
local_planetHistoryPBFFile=$VOLUME_DIR/planet-history-${date}.osm.pbf
17+
cloud_planetHistoryPBFFile=planet/full-history/planet-history-${date}.osm.pbf
2518
stateFile="$VOLUME_DIR/state.txt"
26-
osm_tmp_file="osm_tmp.osm"
27-
28-
# Creating full history
29-
osmosis --read-apidb-change \
30-
host=$POSTGRES_HOST \
31-
database=$POSTGRES_DB \
32-
user=$POSTGRES_USER \
33-
password=$POSTGRES_PASSWORD \
34-
validateSchemaVersion=no \
35-
readFullHistory=yes \
36-
--write-xml-change \
37-
compressionMethod=auto \
38-
$osm_tmp_file
39-
40-
# Convert file to PBF file
41-
osmium cat $osm_tmp_file -o $local_fullHistoryFile
42-
osmium fileinfo $local_fullHistoryFile
43-
44-
# Remove full-hitory osm file, keep only history-latest.osh.pbf files
45-
rm $osm_tmp_file
46-
47-
# AWS
48-
if [ $CLOUDPROVIDER == "aws" ]; then
49-
AWS_URL=${AWS_S3_BUCKET/s3:\/\//http:\/\/}
50-
echo "$AWS_URL.s3.amazonaws.com/$cloud_fullHistoryFile" >$stateFile
51-
# Upload history-planet.osm.pbf
52-
aws s3 cp $local_fullHistoryFile $AWS_S3_BUCKET/$cloud_fullHistoryFile --acl public-read
53-
# Upload state.txt
54-
aws s3 cp $stateFile $AWS_S3_BUCKET/planet/full-history/state.txt --acl public-read
55-
fi
19+
dumpFile="$VOLUME_DIR/input-latest.dump"
20+
5621

57-
# Google Storage
58-
if [ $CLOUDPROVIDER == "gcp" ]; then
59-
echo "https://storage.cloud.google.com/$GCP_STORAGE_BUCKET/$cloud_fullHistoryFile" >$stateFile
60-
# Upload history-planet.osm.pbf
61-
gsutil cp -a public-read $local_fullHistoryFile $GCP_STORAGE_BUCKET/$cloud_fullHistoryFile
62-
# Upload state.txt
63-
gsutil cp -a public-read $stateFile $GCP_STORAGE_BUCKET/planet/full-history/state.txt
22+
# If overwrite flag is enabled, use fixed filenames
23+
if [ "$OVERWRITE_PLANET_FILE" == "true" ]; then
24+
local_planetHistoryPBFFile=$VOLUME_DIR/planet-history-latest.osm.pbf
25+
cloud_planetHistoryPBFFile=planet/planet-history-latest.osm.pbf
6426
fi
6527

66-
# Azure
67-
if [ $CLOUDPROVIDER == "azure" ]; then
68-
# Save the path file
69-
echo "https://$AZURE_STORAGE_ACCOUNT.blob.core.windows.net/$AZURE_CONTAINER_NAME/$cloud_fullHistoryFile" >$stateFile
70-
# Upload history-planet.osm.pbf
71-
az storage blob upload \
72-
--container-name $AZURE_CONTAINER_NAME \
73-
--file $local_fullHistoryFile \
74-
--name $cloud_fullHistoryFile \
75-
--output table
76-
# Upload state.txt
77-
az storage blob upload \
78-
--container-name $AZURE_CONTAINER_NAME \
79-
--file $stateFile \
80-
--name planet/full-history/state.txt \
81-
--output table
28+
29+
# ===============================
30+
# Download db .dump file
31+
# ===============================
32+
download_dump_file() {
33+
echo "Downloading db .dump file from cloud..."
34+
35+
if [ "$CLOUDPROVIDER" == "aws" ]; then
36+
if [[ "$DUMP_CLOUD_URL" == *.txt ]]; then
37+
temp_txt="$VOLUME_DIR/tmp_dump_url.txt"
38+
aws s3 cp "$DUMP_CLOUD_URL" "$temp_txt"
39+
40+
# Get the first line (S3 URL to the .dump or .dump.gz file)
41+
first_line=$(head -n 1 "$temp_txt")
42+
echo "Found dump URL in txt: $first_line"
43+
44+
# Set dump file name based on extension
45+
if [[ "$first_line" == *.gz ]]; then
46+
dumpFile="${dumpFile}.gz"
47+
fi
48+
49+
aws s3 cp "$first_line" "$dumpFile"
50+
if [[ "$dumpFile" == *.gz ]]; then
51+
echo "Decompressing gzip file..."
52+
gunzip -f "$dumpFile"
53+
dumpFile="${dumpFile%.gz}"
54+
fi
55+
rm -f "$temp_txt"
56+
57+
else
58+
# Set dump file name based on extension
59+
if [[ "$DUMP_CLOUD_URL" == *.gz ]]; then
60+
dumpFile="${dumpFile}.gz"
61+
fi
62+
aws s3 cp "$DUMP_CLOUD_URL" "$dumpFile"
63+
if [[ "$dumpFile" == *.gz ]]; then
64+
echo "Decompressing gzip file..."
65+
gunzip -f "$dumpFile"
66+
dumpFile="${dumpFile%.gz}"
67+
fi
68+
fi
69+
70+
elif [ "$CLOUDPROVIDER" == "gcp" ]; then
71+
gsutil cp "$DUMP_CLOUD_URL" "$dumpFile"
72+
else
73+
echo "Unsupported CLOUDPROVIDER: $CLOUDPROVIDER"
74+
exit 1
75+
fi
76+
77+
echo "Dump file ready at: $dumpFile"
78+
}
79+
80+
# ===============================
81+
# Upload planet + state
82+
# ===============================
83+
upload_planet_file() {
84+
echo "Uploading history planet file and updating state.txt..."
85+
86+
if [ "$CLOUDPROVIDER" == "aws" ]; then
87+
AWS_URL=${AWS_S3_BUCKET/s3:\/\//http:\/\/}
88+
echo "$AWS_URL.s3.amazonaws.com/$cloud_planetHistoryPBFFile" > "$stateFile"
89+
aws s3 cp "$local_planetHistoryPBFFile" "$AWS_S3_BUCKET/$cloud_planetHistoryPBFFile" --acl public-read
90+
aws s3 cp "$stateFile" "$AWS_S3_BUCKET/planet/state.txt" --acl public-read
91+
92+
elif [ "$CLOUDPROVIDER" == "gcp" ]; then
93+
echo "https://storage.cloud.google.com/$GCP_STORAGE_BUCKET/$cloud_planetHistoryPBFFile" > "$stateFile"
94+
gsutil cp -a public-read "$local_planetHistoryPBFFile" "$GCP_STORAGE_BUCKET/$cloud_planetHistoryPBFFile"
95+
gsutil cp -a public-read "$stateFile" "$GCP_STORAGE_BUCKET/planet/state.txt"
96+
fi
97+
}
98+
99+
# ===============================
100+
# Generate planet file
101+
# ===============================
102+
103+
if [ "$PLANET_EXPORT_METHOD" == "planet-dump-ng" ]; then
104+
download_dump_file
105+
echo "Generating history planet file with planet-dump-ng..."
106+
export PLANET_EPOCH_DATE="$PLANET_EPOCH_DATE"
107+
planet-dump-ng \
108+
--dump-file "$dumpFile" \
109+
--history-pbf "$local_planetHistoryPBFFile"
110+
111+
elif [ "$PLANET_EXPORT_METHOD" == "osmosis" ]; then
112+
echo "Generating history planet file with osmosis..."
113+
# Creating full history
114+
osmosis --read-apidb-change \
115+
host=$POSTGRES_HOST \
116+
database=$POSTGRES_DB \
117+
user=$POSTGRES_USER \
118+
password=$POSTGRES_PASSWORD \
119+
validateSchemaVersion=no \
120+
readFullHistory=yes \
121+
--write-xml-change \
122+
compressionMethod=auto \
123+
$local_planetHistoryPBFFile
124+
else
125+
echo "Error: Unknown PLANET_EXPORT_METHOD value. Use 'planet-dump-ng' or 'osmosis'."
126+
exit 1
82127
fi
128+
129+
# Upload results
130+
upload_planet_file

images/osm-processor/Dockerfile

Lines changed: 53 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,59 @@
1+
# Stage 1: builder
2+
FROM debian:bookworm-slim AS builder
3+
WORKDIR /opt/planet-dump-ng
4+
5+
RUN set -ex \
6+
&& apt-get update \
7+
&& DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
8+
ca-certificates \
9+
build-essential \
10+
automake \
11+
autoconf \
12+
libxml2-dev \
13+
libboost-dev \
14+
libboost-program-options-dev \
15+
libboost-date-time-dev \
16+
libboost-filesystem-dev \
17+
libboost-thread-dev \
18+
libboost-iostreams-dev \
19+
libosmpbf-dev \
20+
osmpbf-bin \
21+
libprotobuf-dev \
22+
pkg-config \
23+
git \
24+
&& git clone -b planet_epoch_date https://github.com/OpenHistoricalMap/planet-dump-ng.git . \
25+
&& ./autogen.sh \
26+
&& ./configure \
27+
&& make \
28+
&& strip planet-dump-ng
29+
130
FROM debian:bookworm-slim
231
ENV workdir /mnt/data
332
WORKDIR $workdir
433

5-
# Installs osmosis v0.48.3, osmium-tool v1.15.0, and PostgreSQL client
634
RUN set -ex \
735
&& apt-get update \
8-
&& DEBIAN_FRONTEND=noninteractive apt-get install \
9-
-y --no-install-recommends \
10-
"osmosis" \
11-
"osmium-tool" \
12-
# Cloud provider CLIs
13-
"awscli" \
14-
"gsutil" \
15-
"azure-cli" \
16-
# PostgreSQL client
17-
"postgresql-client" \
18-
# Other useful packages
19-
"rsync" \
20-
"pyosmium" \
21-
"tmux" \
22-
"zsh" \
23-
&& rm -rf /var/lib/apt/lists/*
36+
&& DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
37+
osmosis \
38+
osmium-tool \
39+
awscli \
40+
gsutil \
41+
azure-cli \
42+
postgresql-client \
43+
rsync \
44+
pyosmium \
45+
tmux \
46+
zsh \
47+
git \
48+
libxml2 \
49+
libboost-filesystem1.74.0 \
50+
libboost-program-options1.74.0 \
51+
libboost-thread1.74.0 \
52+
libboost-iostreams1.74.0 \
53+
libboost-date-time1.74.0 \
54+
libprotobuf32 \
55+
libprotobuf-lite32 \
56+
libosmpbf1 \
57+
&& rm -rf /var/lib/apt/lists/*
58+
59+
COPY --from=builder /opt/planet-dump-ng/planet-dump-ng /usr/local/bin/planet-dump-ng

images/planet-dump/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
FROM developmentseed/osmseed-osm-processor:0.1.0-n802.h0d9f574
1+
FROM developmentseed/osmseed-osm-processor:0.1.0-0.dev.git.964.h5e9b11b
22

33
VOLUME /mnt/data
44
COPY ./start.sh /

0 commit comments

Comments
 (0)